llvm/llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll

; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck --check-prefix=OBJDUMP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefix=ASM %s

; OBJDUMP: Contents of section .rodata:
; OBJDUMP-NEXT: 0000 00000000 00000000 10010000 00000000  ................
; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000  ................
; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000  ................
; OBJDUMP-NOT:  0030 0000af00 94130000 1a000400 00000000  ................
; OBJDUMP-NEXT: 0030 4000af00 94130000 1a000400 00000000  @...............

; ASM-LABEL: amdhsa_kernarg_preload_4_implicit_6:
; ASM: .amdhsa_user_sgpr_count 10
; ASM: .amdhsa_next_free_sgpr 10
; ASM: ; TotalNumSgprs: 16
; ASM: ; NumSGPRsForWavesPerEU: 16

; Test that we include preloaded SGPRs in the GRANULATED_WAVEFRONT_SGPR_COUNT
; feild that are not explicitly referenced in the kernel. This test has 6 implicit
; user SPGRs enabled, 4 preloaded kernarg SGPRs, plus 6 extra SGPRs allocated
; for flat scratch, ect. The total number of allocated SGPRs encoded in the
; kernel descriptor should be 16. That's a 1 in the KD field since the granule
; size is 8 and it's NumGranules - 1. The encoding for that looks like '40'.

define amdgpu_kernel void @amdhsa_kernarg_preload_4_implicit_6(i128 inreg) { ret void }

; OBJDUMP-NEXT: 0040 00000000 00000000 20010000 00000000  ........ .......
; OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000  ................
; OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000  ................
; OBJDUMP-NEXT: 0070 4000af00 94000000 08000800 00000000  @...............

; ASM-LABEL: amdhsa_kernarg_preload_8_implicit_2:
; ASM: .amdhsa_user_sgpr_count 10
; ASM: .amdhsa_next_free_sgpr 10
; ASM: ; TotalNumSgprs: 16
; ASM: ; NumSGPRsForWavesPerEU: 16

; Only the kernarg_ptr is enabled so we should have 8 preload kernarg SGPRs, 2
; implicit, and 6 extra.

define amdgpu_kernel void @amdhsa_kernarg_preload_8_implicit_2(i256 inreg) #0 { ret void }

; OBJDUMP-NEXT: 0080 00000000 00000000 08010000 00000000  ................
; OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000  ................
; OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000  ................
; OBJDUMP-NEXT: 00b0 4000af00 86000000 08000100 00000000  @...............

; ASM-LABEL: amdhsa_kernarg_preload_1_implicit_2:
; ASM: .amdhsa_user_sgpr_count 3
; ASM: .amdhsa_next_free_sgpr 3
; ASM: ; TotalNumSgprs: 9
; ASM: ; NumSGPRsForWavesPerEU: 9

; 1 preload, 2 implicit, 6 extra. Rounds up to 16 SGPRs in the KD.

define amdgpu_kernel void @amdhsa_kernarg_preload_1_implicit_2(i32 inreg) #0 { ret void }

; OBJDUMP-NEXT: 00c0 00000000 00000000 08010000 00000000  ................
; OBJDUMP-NEXT: 00d0 00000000 00000000 00000000 00000000  ................
; OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000  ................
; OBJDUMP-NEXT: 00f0 0000af00 84000000 08000000 00000000  ................

; ASM-LABEL: amdhsa_kernarg_preload_0_implicit_2:
; ASM: .amdhsa_user_sgpr_count 2
; ASM: .amdhsa_next_free_sgpr 0
; ASM: ; TotalNumSgprs: 6
; ASM: ; NumSGPRsForWavesPerEU: 6

; 0 preload kernarg SGPRs, 2 implicit, 6 extra. Rounds up to 8 SGPRs in the KD.
; Encoded like '00'.

define amdgpu_kernel void @amdhsa_kernarg_preload_0_implicit_2(i32) #0 { ret void }

attributes #0 = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }