; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=1 -O2 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck --check-prefix=GISEL12 %s
; RUN: llc -global-isel=0 -O2 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck --check-prefix=DAGISEL12 %s
; RUN: llc -global-isel=1 -O2 -mtriple=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck --check-prefix=GISEL10 %s
; RUN: llc -global-isel=0 -O2 -mtriple=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck --check-prefix=DAGISEL10 %s
; This shouldn't be too different from wave32, so we'll only test one case.
define amdgpu_cs_chain void @basic(<3 x i32> inreg %sgpr, ptr inreg %callee, i64 inreg %exec, { i32, ptr addrspace(5), i32, i64 } %vgpr, i32 %x, i32 %y) {
; GISEL12-LABEL: basic:
; GISEL12: ; %bb.0: ; %entry
; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL12-NEXT: s_wait_expcnt 0x0
; GISEL12-NEXT: s_wait_samplecnt 0x0
; GISEL12-NEXT: s_wait_bvhcnt 0x0
; GISEL12-NEXT: s_wait_kmcnt 0x0
; GISEL12-NEXT: s_or_saveexec_b64 s[10:11], -1
; GISEL12-NEXT: s_mov_b32 s8, s3
; GISEL12-NEXT: s_mov_b32 s9, s4
; GISEL12-NEXT: s_mov_b32 s4, s5
; GISEL12-NEXT: s_mov_b32 s5, s6
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_and_saveexec_b64 s[6:7], s[10:11]
; GISEL12-NEXT: ; %bb.1: ; %shader
; GISEL12-NEXT: s_or_saveexec_b64 s[10:11], -1
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v13, s[10:11]
; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GISEL12-NEXT: v_cmp_ne_u32_e64 s[12:13], 0, v0
; GISEL12-NEXT: v_mov_b32_e32 v0, s12
; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GISEL12-NEXT: v_mov_b32_e32 v1, s13
; GISEL12-NEXT: s_mov_b64 exec, s[10:11]
; GISEL12-NEXT: v_mov_b32_e32 v11, v0
; GISEL12-NEXT: v_add_nc_u32_e32 v10, 42, v13
; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GISEL12-NEXT: v_mov_b32_e32 v12, v1
; GISEL12-NEXT: ; %bb.2: ; %tail
; GISEL12-NEXT: s_or_b64 exec, exec, s[6:7]
; GISEL12-NEXT: s_mov_b64 exec, s[4:5]
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_setpc_b64 s[8:9]
;
; DAGISEL12-LABEL: basic:
; DAGISEL12: ; %bb.0: ; %entry
; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL12-NEXT: s_wait_expcnt 0x0
; DAGISEL12-NEXT: s_wait_samplecnt 0x0
; DAGISEL12-NEXT: s_wait_bvhcnt 0x0
; DAGISEL12-NEXT: s_wait_kmcnt 0x0
; DAGISEL12-NEXT: s_or_saveexec_b64 s[10:11], -1
; DAGISEL12-NEXT: s_mov_b32 s7, s6
; DAGISEL12-NEXT: s_mov_b32 s6, s5
; DAGISEL12-NEXT: s_mov_b32 s5, s4
; DAGISEL12-NEXT: s_mov_b32 s4, s3
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_and_saveexec_b64 s[8:9], s[10:11]
; DAGISEL12-NEXT: ; %bb.1: ; %shader
; DAGISEL12-NEXT: s_or_saveexec_b64 s[10:11], -1
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v13, s[10:11]
; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; DAGISEL12-NEXT: v_cmp_ne_u32_e64 s[12:13], 0, v0
; DAGISEL12-NEXT: s_mov_b64 exec, s[10:11]
; DAGISEL12-NEXT: v_mov_b32_e32 v11, s12
; DAGISEL12-NEXT: v_add_nc_u32_e32 v10, 42, v13
; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_3)
; DAGISEL12-NEXT: v_mov_b32_e32 v12, s13
; DAGISEL12-NEXT: ; %bb.2: ; %tail
; DAGISEL12-NEXT: s_or_b64 exec, exec, s[8:9]
; DAGISEL12-NEXT: s_mov_b64 exec, s[6:7]
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_setpc_b64 s[4:5]
;
; GISEL10-LABEL: basic:
; GISEL10: ; %bb.0: ; %entry
; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL10-NEXT: s_or_saveexec_b64 s[10:11], -1
; GISEL10-NEXT: s_mov_b32 s8, s3
; GISEL10-NEXT: s_mov_b32 s9, s4
; GISEL10-NEXT: s_mov_b32 s4, s5
; GISEL10-NEXT: s_mov_b32 s5, s6
; GISEL10-NEXT: s_and_saveexec_b64 s[6:7], s[10:11]
; GISEL10-NEXT: ; %bb.1: ; %shader
; GISEL10-NEXT: s_or_saveexec_b64 s[10:11], -1
; GISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v13, s[10:11]
; GISEL10-NEXT: v_cmp_ne_u32_e64 s[12:13], 0, v0
; GISEL10-NEXT: v_mov_b32_e32 v0, s12
; GISEL10-NEXT: v_mov_b32_e32 v1, s13
; GISEL10-NEXT: s_mov_b64 exec, s[10:11]
; GISEL10-NEXT: v_mov_b32_e32 v11, v0
; GISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v13
; GISEL10-NEXT: v_mov_b32_e32 v12, v1
; GISEL10-NEXT: ; %bb.2: ; %tail
; GISEL10-NEXT: s_or_b64 exec, exec, s[6:7]
; GISEL10-NEXT: s_mov_b64 exec, s[4:5]
; GISEL10-NEXT: s_setpc_b64 s[8:9]
;
; DAGISEL10-LABEL: basic:
; DAGISEL10: ; %bb.0: ; %entry
; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL10-NEXT: s_or_saveexec_b64 s[10:11], -1
; DAGISEL10-NEXT: s_mov_b32 s7, s6
; DAGISEL10-NEXT: s_mov_b32 s6, s5
; DAGISEL10-NEXT: s_mov_b32 s5, s4
; DAGISEL10-NEXT: s_mov_b32 s4, s3
; DAGISEL10-NEXT: s_and_saveexec_b64 s[8:9], s[10:11]
; DAGISEL10-NEXT: ; %bb.1: ; %shader
; DAGISEL10-NEXT: s_or_saveexec_b64 s[10:11], -1
; DAGISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v13, s[10:11]
; DAGISEL10-NEXT: v_cmp_ne_u32_e64 s[12:13], 0, v0
; DAGISEL10-NEXT: s_mov_b64 exec, s[10:11]
; DAGISEL10-NEXT: v_mov_b32_e32 v11, s12
; DAGISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v13
; DAGISEL10-NEXT: v_mov_b32_e32 v12, s13
; DAGISEL10-NEXT: ; %bb.2: ; %tail
; DAGISEL10-NEXT: s_or_b64 exec, exec, s[8:9]
; DAGISEL10-NEXT: s_mov_b64 exec, s[6:7]
; DAGISEL10-NEXT: s_setpc_b64 s[4:5]
entry:
%entry_exec = call i1 @llvm.amdgcn.init.whole.wave()
br i1 %entry_exec, label %shader, label %tail
shader:
%nonwwm = add i32 %x, 42
%vgpr.1 = insertvalue { i32, ptr addrspace(5), i32, i64} %vgpr, i32 %nonwwm, 2
%full.vgpr = call i32 @llvm.amdgcn.set.inactive.i32(i32 %x, i32 71)
%non.zero = icmp ne i32 %full.vgpr, 0
%ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %non.zero)
%wwm = call i64 @llvm.amdgcn.strict.wwm.i64(i64 %ballot)
%vgpr.2 = insertvalue { i32, ptr addrspace(5), i32, i64} %vgpr.1, i64 %wwm, 3
br label %tail
tail:
%vgpr.args = phi { i32, ptr addrspace(5), i32, i64} [%vgpr, %entry], [%vgpr.2, %shader]
call void(ptr, i64, <3 x i32>, { i32, ptr addrspace(5), i32, i64 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i64 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i64 } %vgpr.args, i32 0)
unreachable
}