; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=1 -O2 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefix=GISEL12 %s
; RUN: llc -global-isel=0 -O2 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefix=DAGISEL12 %s
; RUN: llc -global-isel=1 -O2 -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck --check-prefix=GISEL10 %s
; RUN: llc -global-isel=0 -O2 -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck --check-prefix=DAGISEL10 %s
define amdgpu_cs_chain void @basic(<3 x i32> inreg %sgpr, ptr inreg %callee, i32 inreg %exec, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 %x, i32 %y) {
; GISEL12-LABEL: basic:
; GISEL12: ; %bb.0: ; %entry
; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL12-NEXT: s_wait_expcnt 0x0
; GISEL12-NEXT: s_wait_samplecnt 0x0
; GISEL12-NEXT: s_wait_bvhcnt 0x0
; GISEL12-NEXT: s_wait_kmcnt 0x0
; GISEL12-NEXT: s_or_saveexec_b32 s8, -1
; GISEL12-NEXT: s_mov_b32 s6, s3
; GISEL12-NEXT: s_mov_b32 s7, s4
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_and_saveexec_b32 s3, s8
; GISEL12-NEXT: ; %bb.1: ; %shader
; GISEL12-NEXT: v_add_nc_u32_e32 v12, 42, v12
; GISEL12-NEXT: v_add_nc_u32_e32 v8, 5, v8
; GISEL12-NEXT: ; %bb.2: ; %tail
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3
; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GISEL12-NEXT: v_add_nc_u32_e32 v11, 32, v12
; GISEL12-NEXT: s_mov_b32 exec_lo, s5
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_setpc_b64 s[6:7]
;
; DAGISEL12-LABEL: basic:
; DAGISEL12: ; %bb.0: ; %entry
; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL12-NEXT: s_wait_expcnt 0x0
; DAGISEL12-NEXT: s_wait_samplecnt 0x0
; DAGISEL12-NEXT: s_wait_bvhcnt 0x0
; DAGISEL12-NEXT: s_wait_kmcnt 0x0
; DAGISEL12-NEXT: s_or_saveexec_b32 s8, -1
; DAGISEL12-NEXT: s_mov_b32 s7, s4
; DAGISEL12-NEXT: s_mov_b32 s6, s3
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_and_saveexec_b32 s3, s8
; DAGISEL12-NEXT: ; %bb.1: ; %shader
; DAGISEL12-NEXT: v_add_nc_u32_e32 v12, 42, v12
; DAGISEL12-NEXT: v_add_nc_u32_e32 v8, 5, v8
; DAGISEL12-NEXT: ; %bb.2: ; %tail
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3
; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; DAGISEL12-NEXT: v_add_nc_u32_e32 v11, 32, v12
; DAGISEL12-NEXT: s_mov_b32 exec_lo, s5
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_setpc_b64 s[6:7]
;
; GISEL10-LABEL: basic:
; GISEL10: ; %bb.0: ; %entry
; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL10-NEXT: s_or_saveexec_b32 s8, -1
; GISEL10-NEXT: s_mov_b32 s6, s3
; GISEL10-NEXT: s_mov_b32 s7, s4
; GISEL10-NEXT: s_and_saveexec_b32 s3, s8
; GISEL10-NEXT: ; %bb.1: ; %shader
; GISEL10-NEXT: v_add_nc_u32_e32 v12, 42, v12
; GISEL10-NEXT: v_add_nc_u32_e32 v8, 5, v8
; GISEL10-NEXT: ; %bb.2: ; %tail
; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3
; GISEL10-NEXT: v_add_nc_u32_e32 v11, 32, v12
; GISEL10-NEXT: s_mov_b32 exec_lo, s5
; GISEL10-NEXT: s_setpc_b64 s[6:7]
;
; DAGISEL10-LABEL: basic:
; DAGISEL10: ; %bb.0: ; %entry
; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL10-NEXT: s_or_saveexec_b32 s8, -1
; DAGISEL10-NEXT: s_mov_b32 s7, s4
; DAGISEL10-NEXT: s_mov_b32 s6, s3
; DAGISEL10-NEXT: s_and_saveexec_b32 s3, s8
; DAGISEL10-NEXT: ; %bb.1: ; %shader
; DAGISEL10-NEXT: v_add_nc_u32_e32 v12, 42, v12
; DAGISEL10-NEXT: v_add_nc_u32_e32 v8, 5, v8
; DAGISEL10-NEXT: ; %bb.2: ; %tail
; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3
; DAGISEL10-NEXT: v_add_nc_u32_e32 v11, 32, v12
; DAGISEL10-NEXT: s_mov_b32 exec_lo, s5
; DAGISEL10-NEXT: s_setpc_b64 s[6:7]
entry:
%entry_exec = call i1 @llvm.amdgcn.init.whole.wave()
br i1 %entry_exec, label %shader, label %tail
shader:
%newx = add i32 %x, 42
%oldval = extractvalue { i32, ptr addrspace(5), i32, i32 } %vgpr, 0
%newval = add i32 %oldval, 5
%newvgpr = insertvalue { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 %newval, 0
br label %tail
tail:
%full.x = phi i32 [%x, %entry], [%newx, %shader]
%full.vgpr = phi { i32, ptr addrspace(5), i32, i32 } [%vgpr, %entry], [%newvgpr, %shader]
%modified.x = add i32 %full.x, 32
%vgpr.args = insertvalue { i32, ptr addrspace(5), i32, i32 } %full.vgpr, i32 %modified.x, 3
call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr.args, i32 0)
unreachable
}
define amdgpu_cs_chain void @wwm_in_shader(<3 x i32> inreg %sgpr, ptr inreg %callee, i32 inreg %exec, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 %x, i32 %y) {
; GISEL12-LABEL: wwm_in_shader:
; GISEL12: ; %bb.0: ; %entry
; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL12-NEXT: s_wait_expcnt 0x0
; GISEL12-NEXT: s_wait_samplecnt 0x0
; GISEL12-NEXT: s_wait_bvhcnt 0x0
; GISEL12-NEXT: s_wait_kmcnt 0x0
; GISEL12-NEXT: s_or_saveexec_b32 s8, -1
; GISEL12-NEXT: v_dual_mov_b32 v10, v12 :: v_dual_mov_b32 v11, v13
; GISEL12-NEXT: s_mov_b32 s6, s3
; GISEL12-NEXT: s_mov_b32 s7, s4
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_and_saveexec_b32 s3, s8
; GISEL12-NEXT: ; %bb.1: ; %shader
; GISEL12-NEXT: s_or_saveexec_b32 s4, -1
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v10, s4
; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GISEL12-NEXT: v_cmp_ne_u32_e64 s8, 0, v0
; GISEL12-NEXT: v_mov_b32_e32 v0, s8
; GISEL12-NEXT: s_mov_b32 exec_lo, s4
; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL12-NEXT: v_dual_mov_b32 v11, v0 :: v_dual_add_nc_u32 v10, 42, v10
; GISEL12-NEXT: ; %bb.2: ; %tail
; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3
; GISEL12-NEXT: s_mov_b32 exec_lo, s5
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_setpc_b64 s[6:7]
;
; DAGISEL12-LABEL: wwm_in_shader:
; DAGISEL12: ; %bb.0: ; %entry
; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL12-NEXT: s_wait_expcnt 0x0
; DAGISEL12-NEXT: s_wait_samplecnt 0x0
; DAGISEL12-NEXT: s_wait_bvhcnt 0x0
; DAGISEL12-NEXT: s_wait_kmcnt 0x0
; DAGISEL12-NEXT: s_or_saveexec_b32 s8, -1
; DAGISEL12-NEXT: v_dual_mov_b32 v11, v13 :: v_dual_mov_b32 v10, v12
; DAGISEL12-NEXT: s_mov_b32 s7, s4
; DAGISEL12-NEXT: s_mov_b32 s6, s3
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_and_saveexec_b32 s3, s8
; DAGISEL12-NEXT: ; %bb.1: ; %shader
; DAGISEL12-NEXT: s_or_saveexec_b32 s4, -1
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v10, s4
; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; DAGISEL12-NEXT: v_cmp_ne_u32_e64 s8, 0, v0
; DAGISEL12-NEXT: s_mov_b32 exec_lo, s4
; DAGISEL12-NEXT: v_dual_mov_b32 v11, s8 :: v_dual_add_nc_u32 v10, 42, v10
; DAGISEL12-NEXT: ; %bb.2: ; %tail
; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3
; DAGISEL12-NEXT: s_mov_b32 exec_lo, s5
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_setpc_b64 s[6:7]
;
; GISEL10-LABEL: wwm_in_shader:
; GISEL10: ; %bb.0: ; %entry
; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL10-NEXT: s_or_saveexec_b32 s8, -1
; GISEL10-NEXT: v_mov_b32_e32 v10, v12
; GISEL10-NEXT: v_mov_b32_e32 v11, v13
; GISEL10-NEXT: s_mov_b32 s6, s3
; GISEL10-NEXT: s_mov_b32 s7, s4
; GISEL10-NEXT: s_and_saveexec_b32 s3, s8
; GISEL10-NEXT: ; %bb.1: ; %shader
; GISEL10-NEXT: s_or_saveexec_b32 s4, -1
; GISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v10, s4
; GISEL10-NEXT: v_cmp_ne_u32_e64 s8, 0, v0
; GISEL10-NEXT: v_mov_b32_e32 v0, s8
; GISEL10-NEXT: s_mov_b32 exec_lo, s4
; GISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v10
; GISEL10-NEXT: v_mov_b32_e32 v11, v0
; GISEL10-NEXT: ; %bb.2: ; %tail
; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3
; GISEL10-NEXT: s_mov_b32 exec_lo, s5
; GISEL10-NEXT: s_setpc_b64 s[6:7]
;
; DAGISEL10-LABEL: wwm_in_shader:
; DAGISEL10: ; %bb.0: ; %entry
; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL10-NEXT: s_or_saveexec_b32 s8, -1
; DAGISEL10-NEXT: v_mov_b32_e32 v11, v13
; DAGISEL10-NEXT: v_mov_b32_e32 v10, v12
; DAGISEL10-NEXT: s_mov_b32 s7, s4
; DAGISEL10-NEXT: s_mov_b32 s6, s3
; DAGISEL10-NEXT: s_and_saveexec_b32 s3, s8
; DAGISEL10-NEXT: ; %bb.1: ; %shader
; DAGISEL10-NEXT: s_or_saveexec_b32 s4, -1
; DAGISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v10, s4
; DAGISEL10-NEXT: v_cmp_ne_u32_e64 s8, 0, v0
; DAGISEL10-NEXT: s_mov_b32 exec_lo, s4
; DAGISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v10
; DAGISEL10-NEXT: v_mov_b32_e32 v11, s8
; DAGISEL10-NEXT: ; %bb.2: ; %tail
; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3
; DAGISEL10-NEXT: s_mov_b32 exec_lo, s5
; DAGISEL10-NEXT: s_setpc_b64 s[6:7]
entry:
%entry_exec = call i1 @llvm.amdgcn.init.whole.wave()
br i1 %entry_exec, label %shader, label %tail
shader:
%nonwwm = add i32 %x, 42
%full.vgpr = call i32 @llvm.amdgcn.set.inactive.i32(i32 %x, i32 71)
%non.zero = icmp ne i32 %full.vgpr, 0
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %non.zero)
%wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %ballot)
br label %tail
tail:
%full.nonwwm = phi i32 [%x, %entry], [%nonwwm, %shader]
%full.wwm = phi i32 [%y, %entry], [%wwm, %shader]
%vgpr.1 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr, i32 %full.nonwwm, 2
%vgpr.2 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr.1, i32 %full.wwm, 3
call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr.2, i32 0)
unreachable
}
define amdgpu_cs_chain void @phi_whole_struct(<3 x i32> inreg %sgpr, ptr inreg %callee, i32 inreg %exec, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 %x, i32 %y) {
; GISEL12-LABEL: phi_whole_struct:
; GISEL12: ; %bb.0: ; %entry
; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL12-NEXT: s_wait_expcnt 0x0
; GISEL12-NEXT: s_wait_samplecnt 0x0
; GISEL12-NEXT: s_wait_bvhcnt 0x0
; GISEL12-NEXT: s_wait_kmcnt 0x0
; GISEL12-NEXT: s_or_saveexec_b32 s8, -1
; GISEL12-NEXT: s_mov_b32 s6, s3
; GISEL12-NEXT: s_mov_b32 s7, s4
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_and_saveexec_b32 s3, s8
; GISEL12-NEXT: ; %bb.1: ; %shader
; GISEL12-NEXT: s_or_saveexec_b32 s4, -1
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v12, s4
; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GISEL12-NEXT: v_cmp_ne_u32_e64 s8, 0, v0
; GISEL12-NEXT: v_mov_b32_e32 v0, s8
; GISEL12-NEXT: s_mov_b32 exec_lo, s4
; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL12-NEXT: v_dual_mov_b32 v11, v0 :: v_dual_add_nc_u32 v10, 42, v12
; GISEL12-NEXT: ; %bb.2: ; %tail
; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3
; GISEL12-NEXT: s_mov_b32 exec_lo, s5
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_setpc_b64 s[6:7]
;
; DAGISEL12-LABEL: phi_whole_struct:
; DAGISEL12: ; %bb.0: ; %entry
; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL12-NEXT: s_wait_expcnt 0x0
; DAGISEL12-NEXT: s_wait_samplecnt 0x0
; DAGISEL12-NEXT: s_wait_bvhcnt 0x0
; DAGISEL12-NEXT: s_wait_kmcnt 0x0
; DAGISEL12-NEXT: s_or_saveexec_b32 s8, -1
; DAGISEL12-NEXT: s_mov_b32 s7, s4
; DAGISEL12-NEXT: s_mov_b32 s6, s3
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_and_saveexec_b32 s3, s8
; DAGISEL12-NEXT: ; %bb.1: ; %shader
; DAGISEL12-NEXT: s_or_saveexec_b32 s4, -1
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v12, s4
; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; DAGISEL12-NEXT: v_cmp_ne_u32_e64 s8, 0, v0
; DAGISEL12-NEXT: s_mov_b32 exec_lo, s4
; DAGISEL12-NEXT: v_dual_mov_b32 v11, s8 :: v_dual_add_nc_u32 v10, 42, v12
; DAGISEL12-NEXT: ; %bb.2: ; %tail
; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3
; DAGISEL12-NEXT: s_mov_b32 exec_lo, s5
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_setpc_b64 s[6:7]
;
; GISEL10-LABEL: phi_whole_struct:
; GISEL10: ; %bb.0: ; %entry
; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL10-NEXT: s_or_saveexec_b32 s8, -1
; GISEL10-NEXT: s_mov_b32 s6, s3
; GISEL10-NEXT: s_mov_b32 s7, s4
; GISEL10-NEXT: s_and_saveexec_b32 s3, s8
; GISEL10-NEXT: ; %bb.1: ; %shader
; GISEL10-NEXT: s_or_saveexec_b32 s4, -1
; GISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v12, s4
; GISEL10-NEXT: v_cmp_ne_u32_e64 s8, 0, v0
; GISEL10-NEXT: v_mov_b32_e32 v0, s8
; GISEL10-NEXT: s_mov_b32 exec_lo, s4
; GISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v12
; GISEL10-NEXT: v_mov_b32_e32 v11, v0
; GISEL10-NEXT: ; %bb.2: ; %tail
; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3
; GISEL10-NEXT: s_mov_b32 exec_lo, s5
; GISEL10-NEXT: s_setpc_b64 s[6:7]
;
; DAGISEL10-LABEL: phi_whole_struct:
; DAGISEL10: ; %bb.0: ; %entry
; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL10-NEXT: s_or_saveexec_b32 s8, -1
; DAGISEL10-NEXT: s_mov_b32 s7, s4
; DAGISEL10-NEXT: s_mov_b32 s6, s3
; DAGISEL10-NEXT: s_and_saveexec_b32 s3, s8
; DAGISEL10-NEXT: ; %bb.1: ; %shader
; DAGISEL10-NEXT: s_or_saveexec_b32 s4, -1
; DAGISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v12, s4
; DAGISEL10-NEXT: v_cmp_ne_u32_e64 s8, 0, v0
; DAGISEL10-NEXT: s_mov_b32 exec_lo, s4
; DAGISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v12
; DAGISEL10-NEXT: v_mov_b32_e32 v11, s8
; DAGISEL10-NEXT: ; %bb.2: ; %tail
; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3
; DAGISEL10-NEXT: s_mov_b32 exec_lo, s5
; DAGISEL10-NEXT: s_setpc_b64 s[6:7]
entry:
%entry_exec = call i1 @llvm.amdgcn.init.whole.wave()
br i1 %entry_exec, label %shader, label %tail
shader:
%nonwwm = add i32 %x, 42
%vgpr.1 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr, i32 %nonwwm, 2
%full.vgpr = call i32 @llvm.amdgcn.set.inactive.i32(i32 %x, i32 71)
%non.zero = icmp ne i32 %full.vgpr, 0
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %non.zero)
%wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %ballot)
%vgpr.2 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr.1, i32 %wwm, 3
br label %tail
tail:
%vgpr.args = phi { i32, ptr addrspace(5), i32, i32} [%vgpr, %entry], [%vgpr.2, %shader]
call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr.args, i32 0)
unreachable
}
; Introduce more complex control flow - %shader contains a simple loop, and %tail contains an if.
define amdgpu_cs_chain void @control_flow(<3 x i32> inreg %sgpr, ptr inreg %callee, i32 inreg %exec, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 %x, i32 %y) {
; GISEL12-LABEL: control_flow:
; GISEL12: ; %bb.0: ; %entry
; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL12-NEXT: s_wait_expcnt 0x0
; GISEL12-NEXT: s_wait_samplecnt 0x0
; GISEL12-NEXT: s_wait_bvhcnt 0x0
; GISEL12-NEXT: s_wait_kmcnt 0x0
; GISEL12-NEXT: s_or_saveexec_b32 s8, -1
; GISEL12-NEXT: s_mov_b32 s6, s3
; GISEL12-NEXT: s_mov_b32 s7, s4
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_and_saveexec_b32 s3, s8
; GISEL12-NEXT: s_cbranch_execz .LBB3_4
; GISEL12-NEXT: ; %bb.1: ; %shader.preheader
; GISEL12-NEXT: v_add_nc_u32_e32 v1, -1, v12
; GISEL12-NEXT: s_mov_b32 s4, 0
; GISEL12-NEXT: .LBB3_2: ; %shader
; GISEL12-NEXT: ; =>This Inner Loop Header: Depth=1
; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GISEL12-NEXT: v_add_nc_u32_e32 v1, 1, v1
; GISEL12-NEXT: s_or_saveexec_b32 s8, -1
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v1, s8
; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GISEL12-NEXT: v_cmp_ne_u32_e64 s9, 0, v0
; GISEL12-NEXT: v_mov_b32_e32 v0, s9
; GISEL12-NEXT: s_mov_b32 exec_lo, s8
; GISEL12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v13, v1
; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GISEL12-NEXT: v_mov_b32_e32 v11, v0
; GISEL12-NEXT: s_or_b32 s4, vcc_lo, s4
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
; GISEL12-NEXT: s_cbranch_execnz .LBB3_2
; GISEL12-NEXT: ; %bb.3: ; %tail.loopexit
; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GISEL12-NEXT: v_add_nc_u32_e32 v10, 42, v1
; GISEL12-NEXT: .LBB3_4: ; %Flow1
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3
; GISEL12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GISEL12-NEXT: s_mov_b32 s3, exec_lo
; GISEL12-NEXT: ; implicit-def: $vgpr8
; GISEL12-NEXT: v_cmpx_lt_i32_e64 v12, v13
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_xor_b32 s3, exec_lo, s3
; GISEL12-NEXT: ; %bb.5: ; %tail.else
; GISEL12-NEXT: s_or_saveexec_b32 s4, -1
; GISEL12-NEXT: v_mov_b32_e32 v0, 15
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_mov_b32 exec_lo, s4
; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL12-NEXT: v_mov_b32_e32 v8, v0
; GISEL12-NEXT: ; %bb.6: ; %Flow
; GISEL12-NEXT: s_and_not1_saveexec_b32 s3, s3
; GISEL12-NEXT: ; %bb.7: ; %tail.then
; GISEL12-NEXT: s_mov_b32 s4, 44
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: v_mov_b32_e32 v8, s4
; GISEL12-NEXT: ; %bb.8: ; %tail.end
; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3
; GISEL12-NEXT: s_mov_b32 exec_lo, s5
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_setpc_b64 s[6:7]
;
; DAGISEL12-LABEL: control_flow:
; DAGISEL12: ; %bb.0: ; %entry
; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL12-NEXT: s_wait_expcnt 0x0
; DAGISEL12-NEXT: s_wait_samplecnt 0x0
; DAGISEL12-NEXT: s_wait_bvhcnt 0x0
; DAGISEL12-NEXT: s_wait_kmcnt 0x0
; DAGISEL12-NEXT: s_or_saveexec_b32 s8, -1
; DAGISEL12-NEXT: s_mov_b32 s7, s4
; DAGISEL12-NEXT: s_mov_b32 s6, s3
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_and_saveexec_b32 s3, s8
; DAGISEL12-NEXT: s_cbranch_execz .LBB3_4
; DAGISEL12-NEXT: ; %bb.1: ; %shader.preheader
; DAGISEL12-NEXT: v_add_nc_u32_e32 v1, -1, v12
; DAGISEL12-NEXT: s_mov_b32 s4, 0
; DAGISEL12-NEXT: .LBB3_2: ; %shader
; DAGISEL12-NEXT: ; =>This Inner Loop Header: Depth=1
; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; DAGISEL12-NEXT: v_add_nc_u32_e32 v1, 1, v1
; DAGISEL12-NEXT: s_or_saveexec_b32 s8, -1
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v1, s8
; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; DAGISEL12-NEXT: v_cmp_ne_u32_e64 s9, 0, v0
; DAGISEL12-NEXT: s_mov_b32 exec_lo, s8
; DAGISEL12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v13, v1
; DAGISEL12-NEXT: v_mov_b32_e32 v11, s9
; DAGISEL12-NEXT: s_or_b32 s4, vcc_lo, s4
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4
; DAGISEL12-NEXT: s_cbranch_execnz .LBB3_2
; DAGISEL12-NEXT: ; %bb.3: ; %tail.loopexit
; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; DAGISEL12-NEXT: v_add_nc_u32_e32 v10, 42, v1
; DAGISEL12-NEXT: .LBB3_4: ; %Flow1
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3
; DAGISEL12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; DAGISEL12-NEXT: s_mov_b32 s3, exec_lo
; DAGISEL12-NEXT: ; implicit-def: $vgpr8
; DAGISEL12-NEXT: v_cmpx_lt_i32_e64 v12, v13
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_xor_b32 s3, exec_lo, s3
; DAGISEL12-NEXT: ; %bb.5: ; %tail.else
; DAGISEL12-NEXT: s_mov_b32 s4, 15
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: v_mov_b32_e32 v8, s4
; DAGISEL12-NEXT: ; %bb.6: ; %Flow
; DAGISEL12-NEXT: s_and_not1_saveexec_b32 s3, s3
; DAGISEL12-NEXT: ; %bb.7: ; %tail.then
; DAGISEL12-NEXT: v_mov_b32_e32 v8, 44
; DAGISEL12-NEXT: ; %bb.8: ; %tail.end
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3
; DAGISEL12-NEXT: s_mov_b32 exec_lo, s5
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_setpc_b64 s[6:7]
;
; GISEL10-LABEL: control_flow:
; GISEL10: ; %bb.0: ; %entry
; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL10-NEXT: s_or_saveexec_b32 s8, -1
; GISEL10-NEXT: s_mov_b32 s6, s3
; GISEL10-NEXT: s_mov_b32 s7, s4
; GISEL10-NEXT: s_and_saveexec_b32 s3, s8
; GISEL10-NEXT: s_cbranch_execz .LBB3_4
; GISEL10-NEXT: ; %bb.1: ; %shader.preheader
; GISEL10-NEXT: v_add_nc_u32_e32 v1, -1, v12
; GISEL10-NEXT: s_mov_b32 s4, 0
; GISEL10-NEXT: .LBB3_2: ; %shader
; GISEL10-NEXT: ; =>This Inner Loop Header: Depth=1
; GISEL10-NEXT: v_add_nc_u32_e32 v1, 1, v1
; GISEL10-NEXT: s_or_saveexec_b32 s8, -1
; GISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v1, s8
; GISEL10-NEXT: v_cmp_ne_u32_e64 s9, 0, v0
; GISEL10-NEXT: v_mov_b32_e32 v0, s9
; GISEL10-NEXT: s_mov_b32 exec_lo, s8
; GISEL10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v13, v1
; GISEL10-NEXT: v_mov_b32_e32 v11, v0
; GISEL10-NEXT: s_or_b32 s4, vcc_lo, s4
; GISEL10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4
; GISEL10-NEXT: s_cbranch_execnz .LBB3_2
; GISEL10-NEXT: ; %bb.3: ; %tail.loopexit
; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v1
; GISEL10-NEXT: .LBB3_4: ; %Flow1
; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3
; GISEL10-NEXT: s_mov_b32 s3, exec_lo
; GISEL10-NEXT: ; implicit-def: $vgpr8
; GISEL10-NEXT: v_cmpx_lt_i32_e64 v12, v13
; GISEL10-NEXT: s_xor_b32 s3, exec_lo, s3
; GISEL10-NEXT: ; %bb.5: ; %tail.else
; GISEL10-NEXT: s_or_saveexec_b32 s4, -1
; GISEL10-NEXT: v_mov_b32_e32 v0, 15
; GISEL10-NEXT: s_mov_b32 exec_lo, s4
; GISEL10-NEXT: v_mov_b32_e32 v8, v0
; GISEL10-NEXT: ; %bb.6: ; %Flow
; GISEL10-NEXT: s_andn2_saveexec_b32 s3, s3
; GISEL10-NEXT: ; %bb.7: ; %tail.then
; GISEL10-NEXT: s_mov_b32 s4, 44
; GISEL10-NEXT: v_mov_b32_e32 v8, s4
; GISEL10-NEXT: ; %bb.8: ; %tail.end
; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3
; GISEL10-NEXT: s_mov_b32 exec_lo, s5
; GISEL10-NEXT: s_setpc_b64 s[6:7]
;
; DAGISEL10-LABEL: control_flow:
; DAGISEL10: ; %bb.0: ; %entry
; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL10-NEXT: s_or_saveexec_b32 s8, -1
; DAGISEL10-NEXT: s_mov_b32 s7, s4
; DAGISEL10-NEXT: s_mov_b32 s6, s3
; DAGISEL10-NEXT: s_and_saveexec_b32 s3, s8
; DAGISEL10-NEXT: s_cbranch_execz .LBB3_4
; DAGISEL10-NEXT: ; %bb.1: ; %shader.preheader
; DAGISEL10-NEXT: v_add_nc_u32_e32 v1, -1, v12
; DAGISEL10-NEXT: s_mov_b32 s4, 0
; DAGISEL10-NEXT: .LBB3_2: ; %shader
; DAGISEL10-NEXT: ; =>This Inner Loop Header: Depth=1
; DAGISEL10-NEXT: v_add_nc_u32_e32 v1, 1, v1
; DAGISEL10-NEXT: s_or_saveexec_b32 s8, -1
; DAGISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v1, s8
; DAGISEL10-NEXT: v_cmp_ne_u32_e64 s9, 0, v0
; DAGISEL10-NEXT: s_mov_b32 exec_lo, s8
; DAGISEL10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v13, v1
; DAGISEL10-NEXT: v_mov_b32_e32 v11, s9
; DAGISEL10-NEXT: s_or_b32 s4, vcc_lo, s4
; DAGISEL10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4
; DAGISEL10-NEXT: s_cbranch_execnz .LBB3_2
; DAGISEL10-NEXT: ; %bb.3: ; %tail.loopexit
; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; DAGISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v1
; DAGISEL10-NEXT: .LBB3_4: ; %Flow1
; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3
; DAGISEL10-NEXT: s_mov_b32 s3, exec_lo
; DAGISEL10-NEXT: ; implicit-def: $vgpr8
; DAGISEL10-NEXT: v_cmpx_lt_i32_e64 v12, v13
; DAGISEL10-NEXT: s_xor_b32 s3, exec_lo, s3
; DAGISEL10-NEXT: ; %bb.5: ; %tail.else
; DAGISEL10-NEXT: s_mov_b32 s4, 15
; DAGISEL10-NEXT: v_mov_b32_e32 v8, s4
; DAGISEL10-NEXT: ; %bb.6: ; %Flow
; DAGISEL10-NEXT: s_andn2_saveexec_b32 s3, s3
; DAGISEL10-NEXT: ; %bb.7: ; %tail.then
; DAGISEL10-NEXT: v_mov_b32_e32 v8, 44
; DAGISEL10-NEXT: ; %bb.8: ; %tail.end
; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3
; DAGISEL10-NEXT: s_mov_b32 exec_lo, s5
; DAGISEL10-NEXT: s_setpc_b64 s[6:7]
entry:
%entry_exec = call i1 @llvm.amdgcn.init.whole.wave()
br i1 %entry_exec, label %shader, label %tail
shader:
%i = phi i32 [%x, %entry], [%i.inc, %shader]
%nonwwm = add i32 %i, 42
%vgpr.1 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr, i32 %nonwwm, 2
%full.vgpr = call i32 @llvm.amdgcn.set.inactive.i32(i32 %i, i32 71)
%non.zero = icmp ne i32 %full.vgpr, 0
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %non.zero)
%wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %ballot)
%vgpr.2 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr.1, i32 %wwm, 3
%i.inc = add i32 %i, 1
%loop.cond = icmp ne i32 %i, %y
br i1 %loop.cond, label %shader, label %tail
tail:
%vgpr.tail = phi { i32, ptr addrspace(5), i32, i32} [%vgpr, %entry], [%vgpr.2, %shader]
%if.cond = icmp sge i32 %x, %y
br i1 %if.cond, label %tail.then, label %tail.else
tail.then:
%vgpr.then = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr.tail, i32 44, 0
br label %tail.end
tail.else:
%wwm.tail = call i32 @llvm.amdgcn.strict.wwm.i32(i32 15)
%vgpr.else = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr.tail, i32 %wwm.tail, 0
br label %tail.end
tail.end:
%vgpr.args = phi { i32, ptr addrspace(5), i32, i32 } [%vgpr.then, %tail.then], [%vgpr.else, %tail.else]
call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr.args, i32 0)
unreachable
}
; Try with v0-v7 occupied - this will force us to use higher registers for temporaries. Make sure we don't preserve them.
define amdgpu_cs_chain void @use_v0_7(<3 x i32> inreg %sgpr, ptr inreg %callee, i32 inreg %exec, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 %x, i32 %y) {
; GISEL12-LABEL: use_v0_7:
; GISEL12: ; %bb.0: ; %entry
; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL12-NEXT: s_wait_expcnt 0x0
; GISEL12-NEXT: s_wait_samplecnt 0x0
; GISEL12-NEXT: s_wait_bvhcnt 0x0
; GISEL12-NEXT: s_wait_kmcnt 0x0
; GISEL12-NEXT: s_or_saveexec_b32 s8, -1
; GISEL12-NEXT: s_mov_b32 s6, s3
; GISEL12-NEXT: s_mov_b32 s7, s4
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_and_saveexec_b32 s3, s8
; GISEL12-NEXT: s_cbranch_execz .LBB4_2
; GISEL12-NEXT: ; %bb.1: ; %shader
; GISEL12-NEXT: s_or_saveexec_b32 s4, -1
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: v_cndmask_b32_e64 v13, 0x47, v12, s4
; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GISEL12-NEXT: v_cmp_ne_u32_e64 s8, 0, v13
; GISEL12-NEXT: v_mov_b32_e32 v13, s8
; GISEL12-NEXT: s_mov_b32 exec_lo, s4
; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL12-NEXT: v_dual_mov_b32 v11, v13 :: v_dual_add_nc_u32 v10, 42, v12
; GISEL12-NEXT: ;;#ASMSTART
; GISEL12-NEXT: ; use v0-7
; GISEL12-NEXT: ;;#ASMEND
; GISEL12-NEXT: .LBB4_2: ; %tail
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3
; GISEL12-NEXT: s_mov_b32 exec_lo, s5
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_setpc_b64 s[6:7]
;
; DAGISEL12-LABEL: use_v0_7:
; DAGISEL12: ; %bb.0: ; %entry
; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL12-NEXT: s_wait_expcnt 0x0
; DAGISEL12-NEXT: s_wait_samplecnt 0x0
; DAGISEL12-NEXT: s_wait_bvhcnt 0x0
; DAGISEL12-NEXT: s_wait_kmcnt 0x0
; DAGISEL12-NEXT: s_or_saveexec_b32 s8, -1
; DAGISEL12-NEXT: s_mov_b32 s7, s4
; DAGISEL12-NEXT: s_mov_b32 s6, s3
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_and_saveexec_b32 s3, s8
; DAGISEL12-NEXT: s_cbranch_execz .LBB4_2
; DAGISEL12-NEXT: ; %bb.1: ; %shader
; DAGISEL12-NEXT: s_or_saveexec_b32 s4, -1
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: v_cndmask_b32_e64 v13, 0x47, v12, s4
; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; DAGISEL12-NEXT: v_cmp_ne_u32_e64 s8, 0, v13
; DAGISEL12-NEXT: s_mov_b32 exec_lo, s4
; DAGISEL12-NEXT: v_dual_mov_b32 v11, s8 :: v_dual_add_nc_u32 v10, 42, v12
; DAGISEL12-NEXT: ;;#ASMSTART
; DAGISEL12-NEXT: ; use v0-7
; DAGISEL12-NEXT: ;;#ASMEND
; DAGISEL12-NEXT: .LBB4_2: ; %tail
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3
; DAGISEL12-NEXT: s_mov_b32 exec_lo, s5
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_setpc_b64 s[6:7]
;
; GISEL10-LABEL: use_v0_7:
; GISEL10: ; %bb.0: ; %entry
; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL10-NEXT: s_or_saveexec_b32 s8, -1
; GISEL10-NEXT: s_mov_b32 s6, s3
; GISEL10-NEXT: s_mov_b32 s7, s4
; GISEL10-NEXT: s_and_saveexec_b32 s3, s8
; GISEL10-NEXT: s_cbranch_execz .LBB4_2
; GISEL10-NEXT: ; %bb.1: ; %shader
; GISEL10-NEXT: s_or_saveexec_b32 s4, -1
; GISEL10-NEXT: v_cndmask_b32_e64 v13, 0x47, v12, s4
; GISEL10-NEXT: v_cmp_ne_u32_e64 s8, 0, v13
; GISEL10-NEXT: v_mov_b32_e32 v13, s8
; GISEL10-NEXT: s_mov_b32 exec_lo, s4
; GISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v12
; GISEL10-NEXT: v_mov_b32_e32 v11, v13
; GISEL10-NEXT: ;;#ASMSTART
; GISEL10-NEXT: ; use v0-7
; GISEL10-NEXT: ;;#ASMEND
; GISEL10-NEXT: .LBB4_2: ; %tail
; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3
; GISEL10-NEXT: s_mov_b32 exec_lo, s5
; GISEL10-NEXT: s_setpc_b64 s[6:7]
;
; DAGISEL10-LABEL: use_v0_7:
; DAGISEL10: ; %bb.0: ; %entry
; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL10-NEXT: s_or_saveexec_b32 s8, -1
; DAGISEL10-NEXT: s_mov_b32 s7, s4
; DAGISEL10-NEXT: s_mov_b32 s6, s3
; DAGISEL10-NEXT: s_and_saveexec_b32 s3, s8
; DAGISEL10-NEXT: s_cbranch_execz .LBB4_2
; DAGISEL10-NEXT: ; %bb.1: ; %shader
; DAGISEL10-NEXT: s_or_saveexec_b32 s4, -1
; DAGISEL10-NEXT: v_cndmask_b32_e64 v13, 0x47, v12, s4
; DAGISEL10-NEXT: v_cmp_ne_u32_e64 s8, 0, v13
; DAGISEL10-NEXT: s_mov_b32 exec_lo, s4
; DAGISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v12
; DAGISEL10-NEXT: v_mov_b32_e32 v11, s8
; DAGISEL10-NEXT: ;;#ASMSTART
; DAGISEL10-NEXT: ; use v0-7
; DAGISEL10-NEXT: ;;#ASMEND
; DAGISEL10-NEXT: .LBB4_2: ; %tail
; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3
; DAGISEL10-NEXT: s_mov_b32 exec_lo, s5
; DAGISEL10-NEXT: s_setpc_b64 s[6:7]
entry:
%entry_exec = call i1 @llvm.amdgcn.init.whole.wave()
br i1 %entry_exec, label %shader, label %tail
shader:
call void asm sideeffect "; use v0-7", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"()
%nonwwm = add i32 %x, 42
%vgpr.1 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr, i32 %nonwwm, 2
%full.vgpr = call i32 @llvm.amdgcn.set.inactive.i32(i32 %x, i32 71)
%non.zero = icmp ne i32 %full.vgpr, 0
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %non.zero)
%wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %ballot)
%vgpr.2 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr.1, i32 %wwm, 3
br label %tail
tail:
%vgpr.args = phi { i32, ptr addrspace(5), i32, i32} [%vgpr, %entry], [%vgpr.2, %shader]
call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr.args, i32 0)
unreachable
}
; Check that the inactive lanes of v8:15 are correctly preserved even across a
; WWM call that reads and writes them.
; FIXME: The GlobalISel path hits a pre-existing issue, so the inactive lanes do get overwritten.
define amdgpu_cs_chain void @wwm_write_to_arg_reg(<3 x i32> inreg %sgpr, ptr inreg %callee, i32 inreg %exec, <16 x i32> %vgpr, i32 %x, i32 %y) {
; GISEL12-LABEL: wwm_write_to_arg_reg:
; GISEL12: ; %bb.0: ; %entry
; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL12-NEXT: s_wait_expcnt 0x0
; GISEL12-NEXT: s_wait_samplecnt 0x0
; GISEL12-NEXT: s_wait_bvhcnt 0x0
; GISEL12-NEXT: s_wait_kmcnt 0x0
; GISEL12-NEXT: s_mov_b32 s32, 0
; GISEL12-NEXT: s_or_saveexec_b32 s9, -1
; GISEL12-NEXT: s_or_saveexec_b32 s12, -1
; GISEL12-NEXT: s_mov_b32 s6, s0
; GISEL12-NEXT: s_mov_b32 s7, s1
; GISEL12-NEXT: s_mov_b32 s8, s2
; GISEL12-NEXT: s_mov_b32 s10, s3
; GISEL12-NEXT: s_mov_b32 s11, s4
; GISEL12-NEXT: v_dual_mov_b32 v24, v8 :: v_dual_mov_b32 v25, v9
; GISEL12-NEXT: v_dual_mov_b32 v26, v10 :: v_dual_mov_b32 v27, v11
; GISEL12-NEXT: v_dual_mov_b32 v28, v12 :: v_dual_mov_b32 v29, v13
; GISEL12-NEXT: v_dual_mov_b32 v30, v14 :: v_dual_mov_b32 v31, v15
; GISEL12-NEXT: v_dual_mov_b32 v32, v16 :: v_dual_mov_b32 v33, v17
; GISEL12-NEXT: v_dual_mov_b32 v34, v18 :: v_dual_mov_b32 v35, v19
; GISEL12-NEXT: v_dual_mov_b32 v36, v20 :: v_dual_mov_b32 v37, v21
; GISEL12-NEXT: v_dual_mov_b32 v38, v22 :: v_dual_mov_b32 v39, v23
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_mov_b32 exec_lo, s12
; GISEL12-NEXT: s_and_saveexec_b32 s4, s9
; GISEL12-NEXT: s_cbranch_execz .LBB5_2
; GISEL12-NEXT: ; %bb.1: ; %shader
; GISEL12-NEXT: s_or_saveexec_b32 s9, -1
; GISEL12-NEXT: s_getpc_b64 s[0:1]
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_sext_i32_i16 s1, s1
; GISEL12-NEXT: s_add_co_u32 s0, s0, write_v0_v15@gotpcrel32@lo+12
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_add_co_ci_u32 s1, s1, write_v0_v15@gotpcrel32@hi+24
; GISEL12-NEXT: v_dual_mov_b32 v0, v24 :: v_dual_mov_b32 v1, v25
; GISEL12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GISEL12-NEXT: v_dual_mov_b32 v2, v26 :: v_dual_mov_b32 v3, v27
; GISEL12-NEXT: v_dual_mov_b32 v4, v28 :: v_dual_mov_b32 v5, v29
; GISEL12-NEXT: v_dual_mov_b32 v6, v30 :: v_dual_mov_b32 v7, v31
; GISEL12-NEXT: v_dual_mov_b32 v8, v32 :: v_dual_mov_b32 v9, v33
; GISEL12-NEXT: v_dual_mov_b32 v10, v34 :: v_dual_mov_b32 v11, v35
; GISEL12-NEXT: v_dual_mov_b32 v12, v36 :: v_dual_mov_b32 v13, v37
; GISEL12-NEXT: v_dual_mov_b32 v14, v38 :: v_dual_mov_b32 v15, v39
; GISEL12-NEXT: s_wait_kmcnt 0x0
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GISEL12-NEXT: v_dual_mov_b32 v24, v0 :: v_dual_mov_b32 v25, v1
; GISEL12-NEXT: v_dual_mov_b32 v26, v2 :: v_dual_mov_b32 v27, v3
; GISEL12-NEXT: v_dual_mov_b32 v28, v4 :: v_dual_mov_b32 v29, v5
; GISEL12-NEXT: v_dual_mov_b32 v30, v6 :: v_dual_mov_b32 v31, v7
; GISEL12-NEXT: v_dual_mov_b32 v32, v8 :: v_dual_mov_b32 v33, v9
; GISEL12-NEXT: v_dual_mov_b32 v34, v10 :: v_dual_mov_b32 v35, v11
; GISEL12-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v37, v13
; GISEL12-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v39, v15
; GISEL12-NEXT: s_mov_b32 exec_lo, s9
; GISEL12-NEXT: ; kill: def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 killed $exec
; GISEL12-NEXT: .LBB5_2: ; %tail
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GISEL12-NEXT: v_dual_mov_b32 v8, v24 :: v_dual_mov_b32 v9, v25
; GISEL12-NEXT: v_dual_mov_b32 v10, v26 :: v_dual_mov_b32 v11, v27
; GISEL12-NEXT: v_dual_mov_b32 v12, v28 :: v_dual_mov_b32 v13, v29
; GISEL12-NEXT: v_dual_mov_b32 v14, v30 :: v_dual_mov_b32 v15, v31
; GISEL12-NEXT: v_dual_mov_b32 v16, v32 :: v_dual_mov_b32 v17, v33
; GISEL12-NEXT: v_dual_mov_b32 v18, v34 :: v_dual_mov_b32 v19, v35
; GISEL12-NEXT: v_dual_mov_b32 v20, v36 :: v_dual_mov_b32 v21, v37
; GISEL12-NEXT: v_dual_mov_b32 v22, v38 :: v_dual_mov_b32 v23, v39
; GISEL12-NEXT: s_mov_b32 s0, s6
; GISEL12-NEXT: s_mov_b32 s1, s7
; GISEL12-NEXT: s_mov_b32 s2, s8
; GISEL12-NEXT: s_mov_b32 exec_lo, s5
; GISEL12-NEXT: s_wait_alu 0xfffe
; GISEL12-NEXT: s_setpc_b64 s[10:11]
;
; DAGISEL12-LABEL: wwm_write_to_arg_reg:
; DAGISEL12: ; %bb.0: ; %entry
; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0
; DAGISEL12-NEXT: s_wait_expcnt 0x0
; DAGISEL12-NEXT: s_wait_samplecnt 0x0
; DAGISEL12-NEXT: s_wait_bvhcnt 0x0
; DAGISEL12-NEXT: s_wait_kmcnt 0x0
; DAGISEL12-NEXT: s_mov_b32 s32, 0
; DAGISEL12-NEXT: s_or_saveexec_b32 s11, -1
; DAGISEL12-NEXT: s_or_saveexec_b32 s6, -1
; DAGISEL12-NEXT: v_dual_mov_b32 v39, v23 :: v_dual_mov_b32 v38, v22
; DAGISEL12-NEXT: v_dual_mov_b32 v37, v21 :: v_dual_mov_b32 v36, v20
; DAGISEL12-NEXT: v_dual_mov_b32 v35, v19 :: v_dual_mov_b32 v34, v18
; DAGISEL12-NEXT: v_dual_mov_b32 v33, v17 :: v_dual_mov_b32 v32, v16
; DAGISEL12-NEXT: v_dual_mov_b32 v31, v15 :: v_dual_mov_b32 v30, v14
; DAGISEL12-NEXT: v_dual_mov_b32 v29, v13 :: v_dual_mov_b32 v28, v12
; DAGISEL12-NEXT: v_dual_mov_b32 v27, v11 :: v_dual_mov_b32 v26, v10
; DAGISEL12-NEXT: v_dual_mov_b32 v25, v9 :: v_dual_mov_b32 v24, v8
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_mov_b32 exec_lo, s6
; DAGISEL12-NEXT: s_mov_b32 s9, s4
; DAGISEL12-NEXT: s_mov_b32 s8, s3
; DAGISEL12-NEXT: s_mov_b32 s4, s2
; DAGISEL12-NEXT: s_mov_b32 s6, s1
; DAGISEL12-NEXT: s_mov_b32 s7, s0
; DAGISEL12-NEXT: s_and_saveexec_b32 s10, s11
; DAGISEL12-NEXT: s_cbranch_execz .LBB5_2
; DAGISEL12-NEXT: ; %bb.1: ; %shader
; DAGISEL12-NEXT: s_or_saveexec_b32 s11, -1
; DAGISEL12-NEXT: s_getpc_b64 s[0:1]
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_sext_i32_i16 s1, s1
; DAGISEL12-NEXT: s_add_co_u32 s0, s0, write_v0_v15@gotpcrel32@lo+12
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_add_co_ci_u32 s1, s1, write_v0_v15@gotpcrel32@hi+24
; DAGISEL12-NEXT: v_dual_mov_b32 v0, v24 :: v_dual_mov_b32 v1, v25
; DAGISEL12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; DAGISEL12-NEXT: v_dual_mov_b32 v2, v26 :: v_dual_mov_b32 v3, v27
; DAGISEL12-NEXT: v_dual_mov_b32 v4, v28 :: v_dual_mov_b32 v5, v29
; DAGISEL12-NEXT: v_dual_mov_b32 v6, v30 :: v_dual_mov_b32 v7, v31
; DAGISEL12-NEXT: v_dual_mov_b32 v8, v32 :: v_dual_mov_b32 v9, v33
; DAGISEL12-NEXT: v_dual_mov_b32 v10, v34 :: v_dual_mov_b32 v11, v35
; DAGISEL12-NEXT: v_dual_mov_b32 v12, v36 :: v_dual_mov_b32 v13, v37
; DAGISEL12-NEXT: v_dual_mov_b32 v14, v38 :: v_dual_mov_b32 v15, v39
; DAGISEL12-NEXT: s_wait_kmcnt 0x0
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_swappc_b64 s[30:31], s[0:1]
; DAGISEL12-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v41, v1
; DAGISEL12-NEXT: v_dual_mov_b32 v42, v2 :: v_dual_mov_b32 v43, v3
; DAGISEL12-NEXT: v_dual_mov_b32 v44, v4 :: v_dual_mov_b32 v45, v5
; DAGISEL12-NEXT: v_dual_mov_b32 v46, v6 :: v_dual_mov_b32 v47, v7
; DAGISEL12-NEXT: v_dual_mov_b32 v48, v8 :: v_dual_mov_b32 v49, v9
; DAGISEL12-NEXT: v_dual_mov_b32 v50, v10 :: v_dual_mov_b32 v51, v11
; DAGISEL12-NEXT: v_dual_mov_b32 v52, v12 :: v_dual_mov_b32 v53, v13
; DAGISEL12-NEXT: v_dual_mov_b32 v54, v14 :: v_dual_mov_b32 v55, v15
; DAGISEL12-NEXT: s_mov_b32 exec_lo, s11
; DAGISEL12-NEXT: v_dual_mov_b32 v24, v40 :: v_dual_mov_b32 v25, v41
; DAGISEL12-NEXT: v_dual_mov_b32 v26, v42 :: v_dual_mov_b32 v27, v43
; DAGISEL12-NEXT: v_dual_mov_b32 v28, v44 :: v_dual_mov_b32 v29, v45
; DAGISEL12-NEXT: v_dual_mov_b32 v30, v46 :: v_dual_mov_b32 v31, v47
; DAGISEL12-NEXT: v_dual_mov_b32 v32, v48 :: v_dual_mov_b32 v33, v49
; DAGISEL12-NEXT: v_dual_mov_b32 v34, v50 :: v_dual_mov_b32 v35, v51
; DAGISEL12-NEXT: v_dual_mov_b32 v36, v52 :: v_dual_mov_b32 v37, v53
; DAGISEL12-NEXT: v_dual_mov_b32 v38, v54 :: v_dual_mov_b32 v39, v55
; DAGISEL12-NEXT: .LBB5_2: ; %tail
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s10
; DAGISEL12-NEXT: v_dual_mov_b32 v8, v24 :: v_dual_mov_b32 v9, v25
; DAGISEL12-NEXT: v_dual_mov_b32 v10, v26 :: v_dual_mov_b32 v11, v27
; DAGISEL12-NEXT: v_dual_mov_b32 v12, v28 :: v_dual_mov_b32 v13, v29
; DAGISEL12-NEXT: v_dual_mov_b32 v14, v30 :: v_dual_mov_b32 v15, v31
; DAGISEL12-NEXT: v_dual_mov_b32 v16, v32 :: v_dual_mov_b32 v17, v33
; DAGISEL12-NEXT: v_dual_mov_b32 v18, v34 :: v_dual_mov_b32 v19, v35
; DAGISEL12-NEXT: v_dual_mov_b32 v20, v36 :: v_dual_mov_b32 v21, v37
; DAGISEL12-NEXT: v_dual_mov_b32 v22, v38 :: v_dual_mov_b32 v23, v39
; DAGISEL12-NEXT: s_mov_b32 s0, s7
; DAGISEL12-NEXT: s_mov_b32 s1, s6
; DAGISEL12-NEXT: s_mov_b32 s2, s4
; DAGISEL12-NEXT: s_mov_b32 exec_lo, s5
; DAGISEL12-NEXT: s_wait_alu 0xfffe
; DAGISEL12-NEXT: s_setpc_b64 s[8:9]
;
; GISEL10-LABEL: wwm_write_to_arg_reg:
; GISEL10: ; %bb.0: ; %entry
; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL10-NEXT: s_mov_b32 s32, 0
; GISEL10-NEXT: s_or_saveexec_b32 s9, -1
; GISEL10-NEXT: s_or_saveexec_b32 s12, -1
; GISEL10-NEXT: s_mov_b32 s6, s0
; GISEL10-NEXT: s_mov_b32 s7, s1
; GISEL10-NEXT: s_mov_b32 s8, s2
; GISEL10-NEXT: s_mov_b32 s10, s3
; GISEL10-NEXT: s_mov_b32 s11, s4
; GISEL10-NEXT: v_mov_b32_e32 v24, v8
; GISEL10-NEXT: v_mov_b32_e32 v25, v9
; GISEL10-NEXT: v_mov_b32_e32 v26, v10
; GISEL10-NEXT: v_mov_b32_e32 v27, v11
; GISEL10-NEXT: v_mov_b32_e32 v28, v12
; GISEL10-NEXT: v_mov_b32_e32 v29, v13
; GISEL10-NEXT: v_mov_b32_e32 v30, v14
; GISEL10-NEXT: v_mov_b32_e32 v31, v15
; GISEL10-NEXT: v_mov_b32_e32 v32, v16
; GISEL10-NEXT: v_mov_b32_e32 v33, v17
; GISEL10-NEXT: v_mov_b32_e32 v34, v18
; GISEL10-NEXT: v_mov_b32_e32 v35, v19
; GISEL10-NEXT: v_mov_b32_e32 v36, v20
; GISEL10-NEXT: v_mov_b32_e32 v37, v21
; GISEL10-NEXT: v_mov_b32_e32 v38, v22
; GISEL10-NEXT: v_mov_b32_e32 v39, v23
; GISEL10-NEXT: s_mov_b32 exec_lo, s12
; GISEL10-NEXT: s_and_saveexec_b32 s4, s9
; GISEL10-NEXT: s_cbranch_execz .LBB5_2
; GISEL10-NEXT: ; %bb.1: ; %shader
; GISEL10-NEXT: s_or_saveexec_b32 s9, -1
; GISEL10-NEXT: s_getpc_b64 s[0:1]
; GISEL10-NEXT: s_add_u32 s0, s0, write_v0_v15@gotpcrel32@lo+4
; GISEL10-NEXT: s_addc_u32 s1, s1, write_v0_v15@gotpcrel32@hi+12
; GISEL10-NEXT: v_mov_b32_e32 v0, v24
; GISEL10-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x0
; GISEL10-NEXT: v_mov_b32_e32 v1, v25
; GISEL10-NEXT: v_mov_b32_e32 v2, v26
; GISEL10-NEXT: v_mov_b32_e32 v3, v27
; GISEL10-NEXT: v_mov_b32_e32 v4, v28
; GISEL10-NEXT: v_mov_b32_e32 v5, v29
; GISEL10-NEXT: v_mov_b32_e32 v6, v30
; GISEL10-NEXT: v_mov_b32_e32 v7, v31
; GISEL10-NEXT: v_mov_b32_e32 v8, v32
; GISEL10-NEXT: v_mov_b32_e32 v9, v33
; GISEL10-NEXT: v_mov_b32_e32 v10, v34
; GISEL10-NEXT: v_mov_b32_e32 v11, v35
; GISEL10-NEXT: v_mov_b32_e32 v12, v36
; GISEL10-NEXT: v_mov_b32_e32 v13, v37
; GISEL10-NEXT: v_mov_b32_e32 v14, v38
; GISEL10-NEXT: v_mov_b32_e32 v15, v39
; GISEL10-NEXT: s_mov_b64 s[0:1], s[48:49]
; GISEL10-NEXT: s_mov_b64 s[2:3], s[50:51]
; GISEL10-NEXT: s_waitcnt lgkmcnt(0)
; GISEL10-NEXT: s_swappc_b64 s[30:31], s[12:13]
; GISEL10-NEXT: v_mov_b32_e32 v24, v0
; GISEL10-NEXT: v_mov_b32_e32 v25, v1
; GISEL10-NEXT: v_mov_b32_e32 v26, v2
; GISEL10-NEXT: v_mov_b32_e32 v27, v3
; GISEL10-NEXT: v_mov_b32_e32 v28, v4
; GISEL10-NEXT: v_mov_b32_e32 v29, v5
; GISEL10-NEXT: v_mov_b32_e32 v30, v6
; GISEL10-NEXT: v_mov_b32_e32 v31, v7
; GISEL10-NEXT: v_mov_b32_e32 v32, v8
; GISEL10-NEXT: v_mov_b32_e32 v33, v9
; GISEL10-NEXT: v_mov_b32_e32 v34, v10
; GISEL10-NEXT: v_mov_b32_e32 v35, v11
; GISEL10-NEXT: v_mov_b32_e32 v36, v12
; GISEL10-NEXT: v_mov_b32_e32 v37, v13
; GISEL10-NEXT: v_mov_b32_e32 v38, v14
; GISEL10-NEXT: v_mov_b32_e32 v39, v15
; GISEL10-NEXT: s_mov_b32 exec_lo, s9
; GISEL10-NEXT: ; kill: def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 killed $exec
; GISEL10-NEXT: .LBB5_2: ; %tail
; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GISEL10-NEXT: v_mov_b32_e32 v8, v24
; GISEL10-NEXT: v_mov_b32_e32 v9, v25
; GISEL10-NEXT: v_mov_b32_e32 v10, v26
; GISEL10-NEXT: v_mov_b32_e32 v11, v27
; GISEL10-NEXT: v_mov_b32_e32 v12, v28
; GISEL10-NEXT: v_mov_b32_e32 v13, v29
; GISEL10-NEXT: v_mov_b32_e32 v14, v30
; GISEL10-NEXT: v_mov_b32_e32 v15, v31
; GISEL10-NEXT: v_mov_b32_e32 v16, v32
; GISEL10-NEXT: v_mov_b32_e32 v17, v33
; GISEL10-NEXT: v_mov_b32_e32 v18, v34
; GISEL10-NEXT: v_mov_b32_e32 v19, v35
; GISEL10-NEXT: v_mov_b32_e32 v20, v36
; GISEL10-NEXT: v_mov_b32_e32 v21, v37
; GISEL10-NEXT: v_mov_b32_e32 v22, v38
; GISEL10-NEXT: v_mov_b32_e32 v23, v39
; GISEL10-NEXT: s_mov_b32 s0, s6
; GISEL10-NEXT: s_mov_b32 s1, s7
; GISEL10-NEXT: s_mov_b32 s2, s8
; GISEL10-NEXT: s_mov_b32 exec_lo, s5
; GISEL10-NEXT: s_setpc_b64 s[10:11]
;
; DAGISEL10-LABEL: wwm_write_to_arg_reg:
; DAGISEL10: ; %bb.0: ; %entry
; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; DAGISEL10-NEXT: s_mov_b32 s32, 0
; DAGISEL10-NEXT: s_or_saveexec_b32 s11, -1
; DAGISEL10-NEXT: s_or_saveexec_b32 s6, -1
; DAGISEL10-NEXT: v_mov_b32_e32 v39, v23
; DAGISEL10-NEXT: v_mov_b32_e32 v38, v22
; DAGISEL10-NEXT: v_mov_b32_e32 v37, v21
; DAGISEL10-NEXT: v_mov_b32_e32 v36, v20
; DAGISEL10-NEXT: v_mov_b32_e32 v35, v19
; DAGISEL10-NEXT: v_mov_b32_e32 v34, v18
; DAGISEL10-NEXT: v_mov_b32_e32 v33, v17
; DAGISEL10-NEXT: v_mov_b32_e32 v32, v16
; DAGISEL10-NEXT: v_mov_b32_e32 v31, v15
; DAGISEL10-NEXT: v_mov_b32_e32 v30, v14
; DAGISEL10-NEXT: v_mov_b32_e32 v29, v13
; DAGISEL10-NEXT: v_mov_b32_e32 v28, v12
; DAGISEL10-NEXT: v_mov_b32_e32 v27, v11
; DAGISEL10-NEXT: v_mov_b32_e32 v26, v10
; DAGISEL10-NEXT: v_mov_b32_e32 v25, v9
; DAGISEL10-NEXT: v_mov_b32_e32 v24, v8
; DAGISEL10-NEXT: s_mov_b32 exec_lo, s6
; DAGISEL10-NEXT: s_mov_b32 s9, s4
; DAGISEL10-NEXT: s_mov_b32 s8, s3
; DAGISEL10-NEXT: s_mov_b32 s4, s2
; DAGISEL10-NEXT: s_mov_b32 s6, s1
; DAGISEL10-NEXT: s_mov_b32 s7, s0
; DAGISEL10-NEXT: s_and_saveexec_b32 s10, s11
; DAGISEL10-NEXT: s_cbranch_execz .LBB5_2
; DAGISEL10-NEXT: ; %bb.1: ; %shader
; DAGISEL10-NEXT: s_or_saveexec_b32 s11, -1
; DAGISEL10-NEXT: s_getpc_b64 s[0:1]
; DAGISEL10-NEXT: s_add_u32 s0, s0, write_v0_v15@gotpcrel32@lo+4
; DAGISEL10-NEXT: s_addc_u32 s1, s1, write_v0_v15@gotpcrel32@hi+12
; DAGISEL10-NEXT: v_mov_b32_e32 v0, v24
; DAGISEL10-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x0
; DAGISEL10-NEXT: v_mov_b32_e32 v1, v25
; DAGISEL10-NEXT: v_mov_b32_e32 v2, v26
; DAGISEL10-NEXT: v_mov_b32_e32 v3, v27
; DAGISEL10-NEXT: v_mov_b32_e32 v4, v28
; DAGISEL10-NEXT: v_mov_b32_e32 v5, v29
; DAGISEL10-NEXT: v_mov_b32_e32 v6, v30
; DAGISEL10-NEXT: v_mov_b32_e32 v7, v31
; DAGISEL10-NEXT: v_mov_b32_e32 v8, v32
; DAGISEL10-NEXT: v_mov_b32_e32 v9, v33
; DAGISEL10-NEXT: v_mov_b32_e32 v10, v34
; DAGISEL10-NEXT: v_mov_b32_e32 v11, v35
; DAGISEL10-NEXT: v_mov_b32_e32 v12, v36
; DAGISEL10-NEXT: v_mov_b32_e32 v13, v37
; DAGISEL10-NEXT: v_mov_b32_e32 v14, v38
; DAGISEL10-NEXT: v_mov_b32_e32 v15, v39
; DAGISEL10-NEXT: s_mov_b64 s[0:1], s[48:49]
; DAGISEL10-NEXT: s_mov_b64 s[2:3], s[50:51]
; DAGISEL10-NEXT: s_waitcnt lgkmcnt(0)
; DAGISEL10-NEXT: s_swappc_b64 s[30:31], s[12:13]
; DAGISEL10-NEXT: v_mov_b32_e32 v40, v0
; DAGISEL10-NEXT: v_mov_b32_e32 v41, v1
; DAGISEL10-NEXT: v_mov_b32_e32 v42, v2
; DAGISEL10-NEXT: v_mov_b32_e32 v43, v3
; DAGISEL10-NEXT: v_mov_b32_e32 v44, v4
; DAGISEL10-NEXT: v_mov_b32_e32 v45, v5
; DAGISEL10-NEXT: v_mov_b32_e32 v46, v6
; DAGISEL10-NEXT: v_mov_b32_e32 v47, v7
; DAGISEL10-NEXT: v_mov_b32_e32 v48, v8
; DAGISEL10-NEXT: v_mov_b32_e32 v49, v9
; DAGISEL10-NEXT: v_mov_b32_e32 v50, v10
; DAGISEL10-NEXT: v_mov_b32_e32 v51, v11
; DAGISEL10-NEXT: v_mov_b32_e32 v52, v12
; DAGISEL10-NEXT: v_mov_b32_e32 v53, v13
; DAGISEL10-NEXT: v_mov_b32_e32 v54, v14
; DAGISEL10-NEXT: v_mov_b32_e32 v55, v15
; DAGISEL10-NEXT: s_mov_b32 exec_lo, s11
; DAGISEL10-NEXT: v_mov_b32_e32 v24, v40
; DAGISEL10-NEXT: v_mov_b32_e32 v25, v41
; DAGISEL10-NEXT: v_mov_b32_e32 v26, v42
; DAGISEL10-NEXT: v_mov_b32_e32 v27, v43
; DAGISEL10-NEXT: v_mov_b32_e32 v28, v44
; DAGISEL10-NEXT: v_mov_b32_e32 v29, v45
; DAGISEL10-NEXT: v_mov_b32_e32 v30, v46
; DAGISEL10-NEXT: v_mov_b32_e32 v31, v47
; DAGISEL10-NEXT: v_mov_b32_e32 v32, v48
; DAGISEL10-NEXT: v_mov_b32_e32 v33, v49
; DAGISEL10-NEXT: v_mov_b32_e32 v34, v50
; DAGISEL10-NEXT: v_mov_b32_e32 v35, v51
; DAGISEL10-NEXT: v_mov_b32_e32 v36, v52
; DAGISEL10-NEXT: v_mov_b32_e32 v37, v53
; DAGISEL10-NEXT: v_mov_b32_e32 v38, v54
; DAGISEL10-NEXT: v_mov_b32_e32 v39, v55
; DAGISEL10-NEXT: .LBB5_2: ; %tail
; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s10
; DAGISEL10-NEXT: v_mov_b32_e32 v8, v24
; DAGISEL10-NEXT: v_mov_b32_e32 v9, v25
; DAGISEL10-NEXT: v_mov_b32_e32 v10, v26
; DAGISEL10-NEXT: v_mov_b32_e32 v11, v27
; DAGISEL10-NEXT: v_mov_b32_e32 v12, v28
; DAGISEL10-NEXT: v_mov_b32_e32 v13, v29
; DAGISEL10-NEXT: v_mov_b32_e32 v14, v30
; DAGISEL10-NEXT: v_mov_b32_e32 v15, v31
; DAGISEL10-NEXT: v_mov_b32_e32 v16, v32
; DAGISEL10-NEXT: v_mov_b32_e32 v17, v33
; DAGISEL10-NEXT: v_mov_b32_e32 v18, v34
; DAGISEL10-NEXT: v_mov_b32_e32 v19, v35
; DAGISEL10-NEXT: v_mov_b32_e32 v20, v36
; DAGISEL10-NEXT: v_mov_b32_e32 v21, v37
; DAGISEL10-NEXT: v_mov_b32_e32 v22, v38
; DAGISEL10-NEXT: v_mov_b32_e32 v23, v39
; DAGISEL10-NEXT: s_mov_b32 s0, s7
; DAGISEL10-NEXT: s_mov_b32 s1, s6
; DAGISEL10-NEXT: s_mov_b32 s2, s4
; DAGISEL10-NEXT: s_mov_b32 exec_lo, s5
; DAGISEL10-NEXT: s_setpc_b64 s[8:9]
entry:
%entry_exec = call i1 @llvm.amdgcn.init.whole.wave()
br i1 %entry_exec, label %shader, label %tail
shader:
%v0.15 = call amdgpu_gfx <16 x i32> @write_v0_v15(<16 x i32> %vgpr)
%vgpr.wwm = call <16 x i32> @llvm.amdgcn.strict.wwm.v16i32(<16 x i32> %v0.15)
br label %tail
tail:
%vgpr.args = phi <16 x i32> [%vgpr, %entry], [%vgpr.wwm, %shader]
call void(ptr, i32, <3 x i32>, <16 x i32>, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 %exec, <3 x i32> inreg %sgpr, <16 x i32> %vgpr.args, i32 0)
unreachable
}
declare amdgpu_gfx <16 x i32> @write_v0_v15(<16 x i32>)