; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-SDAG -enable-var-scope %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=CHECK-GISEL -enable-var-scope %s
define void @test_readfirstlane_i1(ptr addrspace(1) %out, i1 %src) {
; CHECK-SDAG-LABEL: test_readfirstlane_i1:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 1
; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s4
; CHECK-SDAG-NEXT: flat_store_byte v[0:1], v2
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; CHECK-GISEL-LABEL: test_readfirstlane_i1:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: s_and_b32 s4, s4, 1
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4
; CHECK-GISEL-NEXT: flat_store_byte v[0:1], v2
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
%readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %src)
store i1 %readfirstlane, ptr addrspace(1) %out, align 4
ret void
}
define void @test_readfirstlane_i1_inreg(ptr addrspace(1) %out, i1 inreg %src) {
; CHECK-SDAG-LABEL: test_readfirstlane_i1_inreg:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-SDAG-NEXT: s_and_b32 s4, s6, 1
; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s4
; CHECK-SDAG-NEXT: flat_store_byte v[0:1], v2
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; CHECK-GISEL-LABEL: test_readfirstlane_i1_inreg:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: s_and_b32 s4, s6, 1
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4
; CHECK-GISEL-NEXT: flat_store_byte v[0:1], v2
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
%readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %src)
store i1 %readfirstlane, ptr addrspace(1) %out, align 4
ret void
}
define void @test_readfirstlane_i1_select(ptr addrspace(1) %out, i32 %src, i32 %src1) {
; CHECK-SDAG-LABEL: test_readfirstlane_i1_select:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-SDAG-NEXT: v_cmp_lt_u32_e32 vcc, 42, v2
; CHECK-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v4
; CHECK-SDAG-NEXT: s_bitcmp1_b32 s4, 0
; CHECK-SDAG-NEXT: s_cselect_b64 vcc, -1, 0
; CHECK-SDAG-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; CHECK-GISEL-LABEL: test_readfirstlane_i1_select:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_cmp_lt_u32_e32 vcc, 42, v2
; CHECK-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v4
; CHECK-GISEL-NEXT: s_and_b32 s4, 1, s4
; CHECK-GISEL-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4
; CHECK-GISEL-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp ugt i32 %src, 42
%readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %cmp)
%sel = select i1 %readfirstlane, i32 %src, i32 %src1
store i32 %sel, ptr addrspace(1) %out, align 4
ret void
}
define void @test_readfirstlane_i1_load(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; CHECK-SDAG-LABEL: test_readfirstlane_i1_load:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-SDAG-NEXT: flat_load_ubyte v2, v[2:3]
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 1
; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s4
; CHECK-SDAG-NEXT: flat_store_byte v[0:1], v2
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; CHECK-GISEL-LABEL: test_readfirstlane_i1_load:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: flat_load_ubyte v2, v[2:3]
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: s_and_b32 s4, s4, 1
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4
; CHECK-GISEL-NEXT: flat_store_byte v[0:1], v2
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
%load = load i1, ptr addrspace(1) %in
%readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %load)
store i1 %readfirstlane, ptr addrspace(1) %out, align 4
ret void
}
define void @test_readfirstlane_i32(ptr addrspace(1) %out, i32 %src) {
; CHECK-SDAG-LABEL: test_readfirstlane_i32:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s4
; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; CHECK-GISEL-LABEL: test_readfirstlane_i32:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4
; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
%readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %src)
store i32 %readfirstlane, ptr addrspace(1) %out, align 4
ret void
}
define void @test_readfirstlane_i64(ptr addrspace(1) %out, i64 %src) {
; CHECK-SDAG-LABEL: test_readfirstlane_i64:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v3
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v2
; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s5
; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s4
; CHECK-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; CHECK-GISEL-LABEL: test_readfirstlane_i64:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4
; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s5
; CHECK-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
%readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 %src)
store i64 %readfirstlane, ptr addrspace(1) %out, align 4
ret void
}
define void @test_readfirstlane_f64(ptr addrspace(1) %out, double %src) {
; CHECK-SDAG-LABEL: test_readfirstlane_f64:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v3
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v2
; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s5
; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s4
; CHECK-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; CHECK-GISEL-LABEL: test_readfirstlane_f64:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4
; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s5
; CHECK-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
%readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double %src)
store double %readfirstlane, ptr addrspace(1) %out, align 4
ret void
}
define amdgpu_kernel void @test_readfirstlane_imm_i32(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_imm_i32:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_mov_b32 s0, 32
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: ; use s0
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: s_endpgm
;
; CHECK-GISEL-LABEL: test_readfirstlane_imm_i32:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_mov_b32 s0, 32
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s0
; CHECK-GISEL-NEXT: ;;#ASMEND
; CHECK-GISEL-NEXT: s_endpgm
%readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 32)
call void asm sideeffect "; use $0", "s"(i32 %readfirstlane)
ret void
}
define amdgpu_kernel void @test_readfirstlane_imm_i64(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_imm_i64:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_mov_b64 s[0:1], 32
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: ; use s[0:1]
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: s_endpgm
;
; CHECK-GISEL-LABEL: test_readfirstlane_imm_i64:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_mov_b64 s[0:1], 32
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s[0:1]
; CHECK-GISEL-NEXT: ;;#ASMEND
; CHECK-GISEL-NEXT: s_endpgm
%readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 32)
call void asm sideeffect "; use $0", "s"(i64 %readfirstlane)
ret void
}
define amdgpu_kernel void @test_readfirstlane_imm_f64(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_imm_f64:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_mov_b32 s0, 0
; CHECK-SDAG-NEXT: s_mov_b32 s1, 0x40400000
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: ; use s[0:1]
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: s_endpgm
;
; CHECK-GISEL-LABEL: test_readfirstlane_imm_f64:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_mov_b32 s0, 0
; CHECK-GISEL-NEXT: s_mov_b32 s1, 0x40400000
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s[0:1]
; CHECK-GISEL-NEXT: ;;#ASMEND
; CHECK-GISEL-NEXT: s_endpgm
%readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double 32.0)
call void asm sideeffect "; use $0", "s"(double %readfirstlane)
ret void
}
define amdgpu_kernel void @test_readfirstlane_imm_fold_i32(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_i32:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, 32
; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0
; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1
; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2
; CHECK-SDAG-NEXT: s_endpgm
;
; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_i32:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, 32
; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s0
; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s1
; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2
; CHECK-GISEL-NEXT: s_endpgm
%readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 32)
store i32 %readfirstlane, ptr addrspace(1) %out, align 4
ret void
}
define amdgpu_kernel void @test_readfirstlane_imm_fold_i64(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_i64:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, 32
; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, 0
; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1
; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0
; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; CHECK-SDAG-NEXT: s_endpgm
;
; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_i64:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; CHECK-GISEL-NEXT: s_mov_b64 s[2:3], 32
; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2
; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3
; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0
; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; CHECK-GISEL-NEXT: s_endpgm
%readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 32)
store i64 %readfirstlane, ptr addrspace(1) %out, align 4
ret void
}
define amdgpu_kernel void @test_readfirstlane_imm_fold_f64(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_f64:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, 0
; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, 0x40400000
; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1
; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0
; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; CHECK-SDAG-NEXT: s_endpgm
;
; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_f64:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; CHECK-GISEL-NEXT: s_mov_b32 s2, 0
; CHECK-GISEL-NEXT: s_mov_b32 s3, 0x40400000
; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2
; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3
; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0
; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; CHECK-GISEL-NEXT: s_endpgm
%readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double 32.0)
store double %readfirstlane, ptr addrspace(1) %out, align 4
ret void
}
define amdgpu_kernel void @test_readfirstlane_m0(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_m0:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: s_mov_b32 m0, -1
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, m0
; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0
; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1
; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2
; CHECK-SDAG-NEXT: s_endpgm
;
; CHECK-GISEL-LABEL: test_readfirstlane_m0:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: s_mov_b32 m0, -1
; CHECK-GISEL-NEXT: ;;#ASMEND
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, m0
; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s0
; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s1
; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2
; CHECK-GISEL-NEXT: s_endpgm
%m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"()
%readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %m0)
store i32 %readfirstlane, ptr addrspace(1) %out, align 4
ret void
}
define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i32(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_i32:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: s_mov_b32 s2, 0
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s2
; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0
; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1
; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2
; CHECK-SDAG-NEXT: s_endpgm
;
; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_i32:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: s_mov_b32 s2, 0
; CHECK-GISEL-NEXT: ;;#ASMEND
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s2
; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s0
; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s1
; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2
; CHECK-GISEL-NEXT: s_endpgm
%sgpr = call i32 asm "s_mov_b32 $0, 0", "=s"()
%readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %sgpr)
store i32 %readfirstlane, ptr addrspace(1) %out, align 4
ret void
}
define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i64(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_i64:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: s_mov_b64 s[2:3], 0
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s2
; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s3
; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1
; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0
; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; CHECK-SDAG-NEXT: s_endpgm
;
; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_i64:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: s_mov_b64 s[2:3], 0
; CHECK-GISEL-NEXT: ;;#ASMEND
; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2
; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3
; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0
; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; CHECK-GISEL-NEXT: s_endpgm
%sgpr = call i64 asm "s_mov_b64 $0, 0", "=s"()
%readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 %sgpr)
store i64 %readfirstlane, ptr addrspace(1) %out, align 4
ret void
}
define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_f64(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_f64:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: s_mov_b64 s[2:3], 0
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s2
; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s3
; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1
; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0
; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; CHECK-SDAG-NEXT: s_endpgm
;
; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_f64:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: s_mov_b64 s[2:3], 0
; CHECK-GISEL-NEXT: ;;#ASMEND
; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2
; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3
; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0
; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; CHECK-GISEL-NEXT: s_endpgm
%sgpr = call double asm "s_mov_b64 $0, 0", "=s"()
%readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double %sgpr)
store double %readfirstlane, ptr addrspace(1) %out, align 4
ret void
}
define amdgpu_kernel void @test_readfirstlane_fi(ptr addrspace(1) %out) {
; CHECK-SDAG-LABEL: test_readfirstlane_fi:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_add_u32 s0, s0, s15
; CHECK-SDAG-NEXT: s_addc_u32 s1, s1, 0
; CHECK-SDAG-NEXT: s_mov_b32 s4, 0
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: ; use s4
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: s_endpgm
;
; CHECK-GISEL-LABEL: test_readfirstlane_fi:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_add_u32 s0, s0, s15
; CHECK-GISEL-NEXT: s_addc_u32 s1, s1, 0
; CHECK-GISEL-NEXT: s_mov_b32 s4, 0
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s4
; CHECK-GISEL-NEXT: ;;#ASMEND
; CHECK-GISEL-NEXT: s_endpgm
%alloca = alloca i32, addrspace(5)
%int = ptrtoint ptr addrspace(5) %alloca to i32
%readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %int)
call void asm sideeffect "; use $0", "s"(i32 %readfirstlane)
ret void
}
define void @test_readfirstlane_half(ptr addrspace(1) %out, half %src) {
; CHECK-SDAG-LABEL: test_readfirstlane_half:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: ; use s4
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; CHECK-GISEL-LABEL: test_readfirstlane_half:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s4
; CHECK-GISEL-NEXT: ;;#ASMEND
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
%x = call half @llvm.amdgcn.readfirstlane.f16(half %src)
call void asm sideeffect "; use $0", "s"(half %x)
ret void
}
define void @test_readfirstlane_float(ptr addrspace(1) %out, float %src) {
; CHECK-SDAG-LABEL: test_readfirstlane_float:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: ; use s4
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; CHECK-GISEL-LABEL: test_readfirstlane_float:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s4
; CHECK-GISEL-NEXT: ;;#ASMEND
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
%x = call float @llvm.amdgcn.readfirstlane.f32(float %src)
call void asm sideeffect "; use $0", "s"(float %x)
ret void
}
define void @test_readfirstlane_bfloat(ptr addrspace(1) %out, bfloat %src) {
; CHECK-SDAG-LABEL: test_readfirstlane_bfloat:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: ; use s4
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; CHECK-GISEL-LABEL: test_readfirstlane_bfloat:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s4
; CHECK-GISEL-NEXT: ;;#ASMEND
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
%x = call bfloat @llvm.amdgcn.readfirstlane.bf16(bfloat %src)
call void asm sideeffect "; use $0", "s"(bfloat %x)
ret void
}
define void @test_readfirstlane_i16(ptr addrspace(1) %out, i16 %src) {
; CHECK-SDAG-LABEL: test_readfirstlane_i16:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 0xffff
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: ; use s4
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; CHECK-GISEL-LABEL: test_readfirstlane_i16:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s4
; CHECK-GISEL-NEXT: ;;#ASMEND
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
%x = call i16 @llvm.amdgcn.readfirstlane.i16(i16 %src)
call void asm sideeffect "; use $0", "s"(i16 %x)
ret void
}
define void @test_readfirstlane_v2f16(ptr addrspace(1) %out, <2 x half> %src) {
; CHECK-SDAG-LABEL: test_readfirstlane_v2f16:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: ; use s4
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; CHECK-GISEL-LABEL: test_readfirstlane_v2f16:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s4
; CHECK-GISEL-NEXT: ;;#ASMEND
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
%x = call <2 x half> @llvm.amdgcn.readfirstlane.v2f16(<2 x half> %src)
call void asm sideeffect "; use $0", "s"(<2 x half> %x)
ret void
}
define void @test_readfirstlane_v2f32(ptr addrspace(1) %out, <2 x float> %src) {
; CHECK-SDAG-LABEL: test_readfirstlane_v2f32:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: ; use s[4:5]
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; CHECK-GISEL-LABEL: test_readfirstlane_v2f32:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s[4:5]
; CHECK-GISEL-NEXT: ;;#ASMEND
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
%x = call <2 x float> @llvm.amdgcn.readfirstlane.v2f32(<2 x float> %src)
call void asm sideeffect "; use $0", "s"(<2 x float> %x)
ret void
}
define void @test_readfirstlane_v7i32(ptr addrspace(1) %out, <7 x i32> %src) {
; CHECK-SDAG-LABEL: test_readfirstlane_v7i32:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s10, v8
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s9, v7
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s8, v6
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: ; use s[4:10]
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; CHECK-GISEL-LABEL: test_readfirstlane_v7i32:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s10, v8
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s[4:10]
; CHECK-GISEL-NEXT: ;;#ASMEND
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
%x = call <7 x i32> @llvm.amdgcn.readfirstlane.v7i32(<7 x i32> %src)
call void asm sideeffect "; use $0", "s"(<7 x i32> %x)
ret void
}
define void @test_readfirstlane_v8i16(ptr addrspace(1) %out, <8 x i16> %src) {
; CHECK-SDAG-LABEL: test_readfirstlane_v8i16:
; CHECK-SDAG: ; %bb.0:
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-SDAG-NEXT: ;;#ASMSTART
; CHECK-SDAG-NEXT: ; use s[4:7]
; CHECK-SDAG-NEXT: ;;#ASMEND
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; CHECK-GISEL-LABEL: test_readfirstlane_v8i16:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5
; CHECK-GISEL-NEXT: ;;#ASMSTART
; CHECK-GISEL-NEXT: ; use s[4:7]
; CHECK-GISEL-NEXT: ;;#ASMEND
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
%x = call <8 x i16> @llvm.amdgcn.readfirstlane.v8i16(<8 x i16> %src)
call void asm sideeffect "; use $0", "s"(<8 x i16> %x)
ret void
}