; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,CI %s
; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,GFX89,VI %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,GFX89,GFX9 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
define void @void_func_i1(i1 %arg0) #0 {
; CIGFX89-LABEL: void_func_i1:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: v_and_b32_e32 v0, 1, v0
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i1 %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 {
; CIGFX89-LABEL: void_func_i1_zeroext:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: v_or_b32_e32 v0, 12, v0
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i1_zeroext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_or_b32_e32 v0, 12, v0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ext = zext i1 %arg0 to i32
%add = add i32 %ext, 12
store i32 %add, ptr addrspace(1) undef
ret void
}
define void @void_func_i1_signext(i1 signext %arg0) #0 {
; CI-LABEL: void_func_i1_signext:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_i1_signext:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_i1_signext:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v0, 12, v0
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i1_signext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ext = sext i1 %arg0 to i32
%add = add i32 %ext, 12
store i32 %add, ptr addrspace(1) undef
ret void
}
define void @i1_arg_i1_use(i1 %arg) #0 {
; CIGFX89-LABEL: i1_arg_i1_use:
; CIGFX89: ; %bb.0: ; %bb
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: v_and_b32_e32 v0, 1, v0
; CIGFX89-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; CIGFX89-NEXT: s_xor_b64 s[6:7], vcc, -1
; CIGFX89-NEXT: s_and_saveexec_b64 s[4:5], s[6:7]
; CIGFX89-NEXT: s_cbranch_execz .LBB3_2
; CIGFX89-NEXT: ; %bb.1: ; %bb1
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: v_mov_b32_e32 v0, 0
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: .LBB3_2: ; %bb2
; CIGFX89-NEXT: s_or_b64 exec, exec, s[4:5]
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i1_arg_i1_use:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GFX11-NEXT: s_xor_b32 s1, vcc_lo, -1
; GFX11-NEXT: s_and_saveexec_b32 s0, s1
; GFX11-NEXT: s_cbranch_execz .LBB3_2
; GFX11-NEXT: ; %bb.1: ; %bb1
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: .LBB3_2: ; %bb2
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
bb:
br i1 %arg, label %bb2, label %bb1
bb1:
store volatile i32 0, ptr addrspace(1) undef
br label %bb2
bb2:
ret void
}
define void @void_func_i8(i8 %arg0) #0 {
; CIGFX89-LABEL: void_func_i8:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i8 %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 {
; CI-LABEL: void_func_i8_zeroext:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_i8_zeroext:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_i8_zeroext:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v0, 12, v0
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i8_zeroext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ext = zext i8 %arg0 to i32
%add = add i32 %ext, 12
store i32 %add, ptr addrspace(1) undef
ret void
}
define void @void_func_i8_signext(i8 signext %arg0) #0 {
; CI-LABEL: void_func_i8_signext:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_i8_signext:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_i8_signext:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v0, 12, v0
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i8_signext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ext = sext i8 %arg0 to i32
%add = add i32 %ext, 12
store i32 %add, ptr addrspace(1) undef
ret void
}
define void @void_func_i16(i16 %arg0) #0 {
; CIGFX89-LABEL: void_func_i16:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_short v0, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i16 %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 {
; CI-LABEL: void_func_i16_zeroext:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_i16_zeroext:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_i16_zeroext:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v0, 12, v0
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i16_zeroext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ext = zext i16 %arg0 to i32
%add = add i32 %ext, 12
store i32 %add, ptr addrspace(1) undef
ret void
}
define void @void_func_i16_signext(i16 signext %arg0) #0 {
; CI-LABEL: void_func_i16_signext:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_i16_signext:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_i16_signext:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v0, 12, v0
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i16_signext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ext = sext i16 %arg0 to i32
%add = add i32 %ext, 12
store i32 %add, ptr addrspace(1) undef
ret void
}
define void @void_func_i32(i32 %arg0) #0 {
; CIGFX89-LABEL: void_func_i32:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i32 %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_i64(i64 %arg0) #0 {
; CIGFX89-LABEL: void_func_i64:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i64 %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_f16(half %arg0) #0 {
; CI-LABEL: void_func_f16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_f16:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store half %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_f32(float %arg0) #0 {
; CIGFX89-LABEL: void_func_f32:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store float %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_f64(double %arg0) #0 {
; CIGFX89-LABEL: void_func_f64:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store double %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v2i32(<2 x i32> %arg0) #0 {
; CIGFX89-LABEL: void_func_v2i32:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x i32> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v3i32(<3 x i32> %arg0) #0 {
; CIGFX89-LABEL: void_func_v3i32:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x i32> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v4i32(<4 x i32> %arg0) #0 {
; CIGFX89-LABEL: void_func_v4i32:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x i32> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v5i32(<5 x i32> %arg0) #0 {
; CIGFX89-LABEL: void_func_v5i32:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dword v4, off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v5i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b32 v4, off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <5 x i32> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v8i32(<8 x i32> %arg0) #0 {
; CIGFX89-LABEL: void_func_v8i32:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x i32> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v16i32(<16 x i32> %arg0) #0 {
; CIGFX89-LABEL: void_func_v16i32:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v16i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x i32> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v32i32(<32 x i32> %arg0) #0 {
; CIGFX89-LABEL: void_func_v32i32:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(6)
; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v32i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <32 x i32> %arg0, ptr addrspace(1) undef
ret void
}
; 1 over register limit
define void @void_func_v33i32(<33 x i32> %arg0) #0 {
; CI-LABEL: void_func_v33i32:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(5)
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(5)
; CI-NEXT: buffer_store_dword v16, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_v33i32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(5)
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(5)
; VI-NEXT: buffer_store_dword v16, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_v33i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(5)
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(5)
; GFX9-NEXT: buffer_store_dword v16, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v33i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x5
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_store_b32 v32, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <33 x i32> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v2i64(<2 x i64> %arg0) #0 {
; CIGFX89-LABEL: void_func_v2i64:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x i64> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v3i64(<3 x i64> %arg0) #0 {
; CIGFX89-LABEL: void_func_v3i64:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x i64> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v4i64(<4 x i64> %arg0) #0 {
; CIGFX89-LABEL: void_func_v4i64:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x i64> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v5i64(<5 x i64> %arg0) #0 {
; CIGFX89-LABEL: void_func_v5i64:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx2 v[8:9], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v5i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x2
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b64 v[8:9], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <5 x i64> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v8i64(<8 x i64> %arg0) #0 {
; CIGFX89-LABEL: void_func_v8i64:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x i64> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v16i64(<16 x i64> %arg0) #0 {
; CIGFX89-LABEL: void_func_v16i64:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(6)
; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v16i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x i64> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v2i8(<2 x i8> %arg0) #0 {
; CI-LABEL: void_func_v2i8:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
; CI-NEXT: s_mov_b32 s4, 0
; CI-NEXT: v_or_b32_e32 v0, v0, v1
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: s_mov_b32 s5, s4
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v2i8:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX89-NEXT: s_mov_b32 s4, 0
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: s_mov_b32 s5, s4
; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX11-NEXT: s_mov_b32 s0, 0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_mov_b32 s1, s0
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x i8> %arg0, ptr addrspace(1) null
ret void
}
define void @void_func_v2i16(<2 x i16> %arg0) #0 {
; CI-LABEL: void_func_v2i16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
; CI-NEXT: v_or_b32_e32 v0, v0, v1
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v2i16:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x i16> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v3i8(<3 x i8> %arg0) #0 {
; CI-LABEL: void_func_v3i8:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
; CI-NEXT: s_mov_b32 s5, 0
; CI-NEXT: s_mov_b32 s4, 2
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: v_or_b32_e32 v0, v0, v1
; CI-NEXT: buffer_store_byte v2, off, s[4:7], 0
; CI-NEXT: s_mov_b32 s4, s5
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v3i8:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX89-NEXT: s_mov_b32 s5, 0
; GFX89-NEXT: s_mov_b32 s4, 2
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: buffer_store_byte v2, off, s[4:7], 0
; GFX89-NEXT: s_mov_b32 s4, s5
; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX11-NEXT: s_mov_b32 s1, 0
; GFX11-NEXT: s_mov_b32 s0, 2
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
; GFX11-NEXT: buffer_store_b8 v2, off, s[0:3], 0
; GFX11-NEXT: s_mov_b32 s0, s1
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x i8> %arg0, ptr addrspace(1) null
ret void
}
define void @void_func_v4i8(<4 x i8> %arg0) #0 {
; CI-LABEL: void_func_v4i8:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_and_b32_e32 v2, 0xff, v2
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; CI-NEXT: v_or_b32_e32 v0, v0, v1
; CI-NEXT: v_or_b32_e32 v2, v3, v2
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
; CI-NEXT: s_mov_b32 s4, 0
; CI-NEXT: v_or_b32_e32 v0, v0, v2
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: s_mov_b32 s5, s4
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v4i8:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3
; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: s_mov_b32 s4, 0
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: s_mov_b32 s5, s4
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX11-NEXT: s_mov_b32 s0, 0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
; GFX11-NEXT: s_mov_b32 s1, s0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x i8> %arg0, ptr addrspace(1) null
ret void
}
define void @void_func_v5i8(<5 x i8> %arg0) #0 {
; CI-LABEL: void_func_v5i8:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_and_b32_e32 v2, 0xff, v2
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; CI-NEXT: v_or_b32_e32 v0, v0, v1
; CI-NEXT: v_or_b32_e32 v2, v3, v2
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
; CI-NEXT: s_mov_b32 s5, 0
; CI-NEXT: s_mov_b32 s4, 4
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: v_or_b32_e32 v0, v0, v2
; CI-NEXT: buffer_store_byte v4, off, s[4:7], 0
; CI-NEXT: s_mov_b32 s4, s5
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v5i8:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3
; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: s_mov_b32 s5, 0
; GFX89-NEXT: s_mov_b32 s4, 4
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX89-NEXT: buffer_store_byte v4, off, s[4:7], 0
; GFX89-NEXT: s_mov_b32 s4, s5
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v5i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX11-NEXT: s_mov_b32 s1, 0
; GFX11-NEXT: s_mov_b32 s0, 4
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: buffer_store_b8 v4, off, s[0:3], 0
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX11-NEXT: s_mov_b32 s0, s1
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <5 x i8> %arg0, ptr addrspace(1) null
ret void
}
define void @void_func_v8i8(<8 x i8> %arg0) #0 {
; CI-LABEL: void_func_v8i8:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_and_b32_e32 v6, 0xff, v6
; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v5
; CI-NEXT: v_and_b32_e32 v4, 0xff, v4
; CI-NEXT: v_and_b32_e32 v2, 0xff, v2
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
; CI-NEXT: v_lshlrev_b32_e32 v7, 24, v7
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; CI-NEXT: v_or_b32_e32 v4, v4, v5
; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; CI-NEXT: v_or_b32_e32 v0, v0, v1
; CI-NEXT: v_or_b32_e32 v6, v7, v6
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
; CI-NEXT: v_or_b32_e32 v2, v3, v2
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
; CI-NEXT: s_mov_b32 s4, 0
; CI-NEXT: v_or_b32_e32 v4, v4, v6
; CI-NEXT: v_or_b32_e32 v3, v0, v2
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: s_mov_b32 s5, s4
; CI-NEXT: buffer_store_dwordx2 v[3:4], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v8i8:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v5
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v7
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3
; GFX89-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: s_mov_b32 s4, 0
; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX89-NEXT: v_or_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: s_mov_b32 s5, s4
; GFX89-NEXT: buffer_store_dwordx2 v[3:4], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5
; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7
; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX11-NEXT: v_or_b32_e32 v4, v4, v5
; GFX11-NEXT: v_or_b32_e32 v5, v6, v7
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
; GFX11-NEXT: s_mov_b32 s0, 0
; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v4
; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v5
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 16, v1
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
; GFX11-NEXT: s_mov_b32 s1, s0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_or_b32_e32 v0, v0, v4
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x i8> %arg0, ptr addrspace(1) null
ret void
}
define void @void_func_v16i8(<16 x i8> %arg0) #0 {
; CI-LABEL: void_func_v16i8:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_and_b32_e32 v14, 0xff, v14
; CI-NEXT: v_lshlrev_b32_e32 v13, 8, v13
; CI-NEXT: v_and_b32_e32 v12, 0xff, v12
; CI-NEXT: v_and_b32_e32 v10, 0xff, v10
; CI-NEXT: v_lshlrev_b32_e32 v9, 8, v9
; CI-NEXT: v_and_b32_e32 v8, 0xff, v8
; CI-NEXT: v_and_b32_e32 v6, 0xff, v6
; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v5
; CI-NEXT: v_and_b32_e32 v4, 0xff, v4
; CI-NEXT: v_and_b32_e32 v2, 0xff, v2
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
; CI-NEXT: v_lshlrev_b32_e32 v15, 24, v15
; CI-NEXT: v_lshlrev_b32_e32 v14, 16, v14
; CI-NEXT: v_or_b32_e32 v12, v12, v13
; CI-NEXT: v_lshlrev_b32_e32 v11, 24, v11
; CI-NEXT: v_lshlrev_b32_e32 v10, 16, v10
; CI-NEXT: v_or_b32_e32 v8, v8, v9
; CI-NEXT: v_lshlrev_b32_e32 v7, 24, v7
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; CI-NEXT: v_or_b32_e32 v4, v4, v5
; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; CI-NEXT: v_or_b32_e32 v0, v0, v1
; CI-NEXT: v_or_b32_e32 v14, v15, v14
; CI-NEXT: v_and_b32_e32 v12, 0xffff, v12
; CI-NEXT: v_or_b32_e32 v10, v11, v10
; CI-NEXT: v_and_b32_e32 v8, 0xffff, v8
; CI-NEXT: v_or_b32_e32 v6, v7, v6
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
; CI-NEXT: v_or_b32_e32 v2, v3, v2
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
; CI-NEXT: s_mov_b32 s4, 0
; CI-NEXT: v_or_b32_e32 v12, v12, v14
; CI-NEXT: v_or_b32_e32 v11, v8, v10
; CI-NEXT: v_or_b32_e32 v10, v4, v6
; CI-NEXT: v_or_b32_e32 v9, v0, v2
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: s_mov_b32 s5, s4
; CI-NEXT: buffer_store_dwordx4 v[9:12], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v16i8:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v13
; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v9
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v5
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX89-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v15
; GFX89-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v11
; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v7
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3
; GFX89-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: s_mov_b32 s4, 0
; GFX89-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX89-NEXT: v_or_b32_sdwa v11, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX89-NEXT: v_or_b32_sdwa v10, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX89-NEXT: v_or_b32_sdwa v9, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: s_mov_b32 s5, s4
; GFX89-NEXT: buffer_store_dwordx4 v[9:12], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v16i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_lshlrev_b16 v13, 8, v13
; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12
; GFX11-NEXT: v_lshlrev_b16 v15, 8, v15
; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v14
; GFX11-NEXT: v_lshlrev_b16 v9, 8, v9
; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8
; GFX11-NEXT: v_lshlrev_b16 v11, 8, v11
; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10
; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5
; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7
; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX11-NEXT: v_or_b32_e32 v12, v12, v13
; GFX11-NEXT: v_or_b32_e32 v13, v14, v15
; GFX11-NEXT: v_or_b32_e32 v8, v8, v9
; GFX11-NEXT: v_or_b32_e32 v10, v10, v11
; GFX11-NEXT: v_or_b32_e32 v4, v4, v5
; GFX11-NEXT: v_or_b32_e32 v5, v6, v7
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
; GFX11-NEXT: v_and_b32_e32 v9, 0xffff, v12
; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v13
; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v8
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v10
; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX11-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX11-NEXT: v_or_b32_e32 v3, v9, v12
; GFX11-NEXT: v_or_b32_e32 v2, v8, v2
; GFX11-NEXT: v_or_b32_e32 v1, v4, v5
; GFX11-NEXT: s_mov_b32 s0, 0
; GFX11-NEXT: v_or_b32_e32 v0, v0, v6
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_mov_b32 s1, s0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x i8> %arg0, ptr addrspace(1) null
ret void
}
define void @void_func_v32i8(<32 x i8> %arg0) #0 {
; CI-LABEL: void_func_v32i8:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_and_b32_e32 v2, 0xff, v2
; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; CI-NEXT: v_or_b32_e32 v2, v3, v2
; CI-NEXT: buffer_load_dword v3, off, s[0:3], s32
; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v5
; CI-NEXT: v_and_b32_e32 v4, 0xff, v4
; CI-NEXT: v_and_b32_e32 v6, 0xff, v6
; CI-NEXT: v_or_b32_e32 v4, v4, v5
; CI-NEXT: v_lshlrev_b32_e32 v5, 24, v7
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; CI-NEXT: v_lshlrev_b32_e32 v13, 8, v13
; CI-NEXT: v_and_b32_e32 v12, 0xff, v12
; CI-NEXT: v_or_b32_e32 v5, v5, v6
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; CI-NEXT: v_and_b32_e32 v0, 0xff, v0
; CI-NEXT: v_or_b32_e32 v12, v12, v13
; CI-NEXT: v_lshlrev_b32_e32 v9, 8, v9
; CI-NEXT: v_and_b32_e32 v8, 0xff, v8
; CI-NEXT: v_and_b32_e32 v13, 0xff, v14
; CI-NEXT: v_and_b32_e32 v10, 0xff, v10
; CI-NEXT: v_or_b32_e32 v7, v4, v5
; CI-NEXT: v_or_b32_e32 v0, v0, v1
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v29
; CI-NEXT: v_and_b32_e32 v4, 0xff, v28
; CI-NEXT: v_and_b32_e32 v6, 0xff, v26
; CI-NEXT: v_or_b32_e32 v8, v8, v9
; CI-NEXT: v_lshlrev_b32_e32 v9, 24, v15
; CI-NEXT: v_lshlrev_b32_e32 v13, 16, v13
; CI-NEXT: v_lshlrev_b32_e32 v11, 24, v11
; CI-NEXT: v_lshlrev_b32_e32 v10, 16, v10
; CI-NEXT: v_or_b32_e32 v1, v4, v1
; CI-NEXT: v_and_b32_e32 v4, 0xff, v30
; CI-NEXT: v_lshlrev_b32_e32 v5, 24, v27
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
; CI-NEXT: v_or_b32_e32 v9, v9, v13
; CI-NEXT: v_or_b32_e32 v10, v11, v10
; CI-NEXT: v_and_b32_e32 v11, 0xffff, v12
; CI-NEXT: v_and_b32_e32 v8, 0xffff, v8
; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; CI-NEXT: v_or_b32_e32 v5, v5, v6
; CI-NEXT: v_or_b32_e32 v6, v0, v2
; CI-NEXT: v_or_b32_e32 v9, v11, v9
; CI-NEXT: v_or_b32_e32 v8, v8, v10
; CI-NEXT: v_lshlrev_b32_e32 v10, 8, v25
; CI-NEXT: v_and_b32_e32 v11, 0xff, v24
; CI-NEXT: v_and_b32_e32 v1, 0xffff, v1
; CI-NEXT: s_mov_b32 s5, 0
; CI-NEXT: s_mov_b32 s4, 16
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshlrev_b32_e32 v0, 24, v3
; CI-NEXT: v_or_b32_e32 v0, v0, v4
; CI-NEXT: v_or_b32_e32 v3, v1, v0
; CI-NEXT: v_or_b32_e32 v0, v11, v10
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
; CI-NEXT: v_and_b32_e32 v1, 0xff, v22
; CI-NEXT: v_or_b32_e32 v2, v0, v5
; CI-NEXT: v_lshlrev_b32_e32 v0, 24, v23
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; CI-NEXT: v_or_b32_e32 v0, v0, v1
; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v21
; CI-NEXT: v_and_b32_e32 v4, 0xff, v20
; CI-NEXT: v_or_b32_e32 v1, v4, v1
; CI-NEXT: v_and_b32_e32 v1, 0xffff, v1
; CI-NEXT: v_and_b32_e32 v4, 0xff, v18
; CI-NEXT: v_or_b32_e32 v1, v1, v0
; CI-NEXT: v_lshlrev_b32_e32 v0, 24, v19
; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; CI-NEXT: v_or_b32_e32 v0, v0, v4
; CI-NEXT: v_lshlrev_b32_e32 v4, 8, v17
; CI-NEXT: v_and_b32_e32 v5, 0xff, v16
; CI-NEXT: v_or_b32_e32 v4, v5, v4
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
; CI-NEXT: v_or_b32_e32 v0, v4, v0
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CI-NEXT: s_mov_b32 s4, s5
; CI-NEXT: buffer_store_dwordx4 v[6:9], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v32i8:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v13
; GFX89-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v15
; GFX89-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: buffer_load_ubyte v14, off, s[0:3], s32
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v5
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v7
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3
; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v9
; GFX89-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_or_b32_sdwa v6, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v29
; GFX89-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v11
; GFX89-NEXT: v_or_b32_sdwa v7, v28, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v25
; GFX89-NEXT: v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_or_b32_sdwa v10, v24, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v27
; GFX89-NEXT: v_lshlrev_b16_e32 v2, 8, v23
; GFX89-NEXT: v_or_b32_sdwa v11, v26, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v21
; GFX89-NEXT: v_lshlrev_b16_e32 v3, 8, v17
; GFX89-NEXT: v_lshlrev_b16_e32 v15, 8, v19
; GFX89-NEXT: v_or_b32_sdwa v19, v22, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_or_b32_sdwa v2, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX89-NEXT: v_or_b32_sdwa v17, v20, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_or_b32_sdwa v16, v16, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_or_b32_sdwa v15, v18, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: s_mov_b32 s5, 0
; GFX89-NEXT: s_mov_b32 s4, 16
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: v_or_b32_sdwa v1, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX89-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX89-NEXT: v_or_b32_sdwa v6, v10, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX89-NEXT: v_or_b32_sdwa v5, v17, v19 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX89-NEXT: v_or_b32_sdwa v4, v16, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX89-NEXT: v_or_b32_sdwa v3, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: v_lshlrev_b16_e32 v8, 8, v14
; GFX89-NEXT: v_or_b32_sdwa v8, v30, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX89-NEXT: v_or_b32_sdwa v7, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; GFX89-NEXT: s_mov_b32 s4, s5
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v32i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: scratch_load_u8 v31, off, s32
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3
; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX11-NEXT: v_lshlrev_b16 v9, 8, v9
; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8
; GFX11-NEXT: v_lshlrev_b16 v11, 8, v11
; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10
; GFX11-NEXT: v_lshlrev_b16 v17, 8, v17
; GFX11-NEXT: v_and_b32_e32 v16, 0xff, v16
; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
; GFX11-NEXT: v_or_b32_e32 v1, v2, v3
; GFX11-NEXT: v_lshlrev_b16 v13, 8, v13
; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12
; GFX11-NEXT: v_lshlrev_b16 v15, 8, v15
; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v14
; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5
; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7
; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX11-NEXT: v_lshlrev_b16 v29, 8, v29
; GFX11-NEXT: v_and_b32_e32 v28, 0xff, v28
; GFX11-NEXT: v_and_b32_e32 v30, 0xff, v30
; GFX11-NEXT: v_lshlrev_b16 v25, 8, v25
; GFX11-NEXT: v_and_b32_e32 v24, 0xff, v24
; GFX11-NEXT: v_lshlrev_b16 v27, 8, v27
; GFX11-NEXT: v_and_b32_e32 v26, 0xff, v26
; GFX11-NEXT: v_lshlrev_b16 v21, 8, v21
; GFX11-NEXT: v_and_b32_e32 v20, 0xff, v20
; GFX11-NEXT: v_lshlrev_b16 v23, 8, v23
; GFX11-NEXT: v_and_b32_e32 v22, 0xff, v22
; GFX11-NEXT: v_lshlrev_b16 v19, 8, v19
; GFX11-NEXT: v_and_b32_e32 v18, 0xff, v18
; GFX11-NEXT: v_or_b32_e32 v8, v8, v9
; GFX11-NEXT: v_or_b32_e32 v9, v10, v11
; GFX11-NEXT: v_or_b32_e32 v11, v16, v17
; GFX11-NEXT: v_lshlrev_b32_e32 v17, 16, v1
; GFX11-NEXT: v_or_b32_e32 v12, v12, v13
; GFX11-NEXT: v_or_b32_e32 v13, v14, v15
; GFX11-NEXT: v_or_b32_e32 v4, v4, v5
; GFX11-NEXT: v_or_b32_e32 v5, v6, v7
; GFX11-NEXT: v_or_b32_e32 v2, v28, v29
; GFX11-NEXT: v_or_b32_e32 v3, v24, v25
; GFX11-NEXT: v_or_b32_e32 v6, v26, v27
; GFX11-NEXT: v_or_b32_e32 v7, v20, v21
; GFX11-NEXT: v_or_b32_e32 v10, v22, v23
; GFX11-NEXT: v_or_b32_e32 v14, v18, v19
; GFX11-NEXT: v_and_b32_e32 v15, 0xffff, v4
; GFX11-NEXT: v_lshlrev_b32_e32 v16, 16, v5
; GFX11-NEXT: v_and_b32_e32 v18, 0xffff, v2
; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v3
; GFX11-NEXT: v_lshlrev_b32_e32 v5, 16, v6
; GFX11-NEXT: v_and_b32_e32 v7, 0xffff, v7
; GFX11-NEXT: v_lshlrev_b32_e32 v10, 16, v10
; GFX11-NEXT: v_and_b32_e32 v11, 0xffff, v11
; GFX11-NEXT: v_lshlrev_b32_e32 v14, 16, v14
; GFX11-NEXT: v_and_b32_e32 v12, 0xffff, v12
; GFX11-NEXT: v_lshlrev_b32_e32 v13, 16, v13
; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v8
; GFX11-NEXT: v_lshlrev_b32_e32 v9, 16, v9
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: v_or_b32_e32 v6, v4, v5
; GFX11-NEXT: v_or_b32_e32 v5, v7, v10
; GFX11-NEXT: v_or_b32_e32 v4, v11, v14
; GFX11-NEXT: v_or_b32_e32 v3, v12, v13
; GFX11-NEXT: v_or_b32_e32 v2, v8, v9
; GFX11-NEXT: v_or_b32_e32 v0, v0, v17
; GFX11-NEXT: s_mov_b32 s1, 0
; GFX11-NEXT: s_mov_b32 s0, 16
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshlrev_b16 v1, 8, v31
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_or_b32_e32 v1, v30, v1
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_or_b32_e32 v7, v18, v1
; GFX11-NEXT: v_or_b32_e32 v1, v15, v16
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: s_mov_b32 s0, s1
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <32 x i8> %arg0, ptr addrspace(1) null
ret void
}
define void @void_func_v3i16(<3 x i16> %arg0) #0 {
; CI-LABEL: void_func_v3i16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: v_or_b32_e32 v0, v0, v1
; CI-NEXT: buffer_store_short v2, off, s[4:7], 0
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v3i16:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_store_short v1, off, s[4:7], 0
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x i16> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v4i16(<4 x i16> %arg0) #0 {
; CI-LABEL: void_func_v4i16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
; CI-NEXT: v_or_b32_e32 v2, v2, v3
; CI-NEXT: v_or_b32_e32 v1, v0, v1
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v4i16:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x i16> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v5i16(<5 x i16> %arg0) #0 {
; CI-LABEL: void_func_v5i16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: v_or_b32_e32 v2, v2, v3
; CI-NEXT: v_or_b32_e32 v1, v0, v1
; CI-NEXT: buffer_store_short v4, off, s[4:7], 0
; CI-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v5i16:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_store_short v2, off, s[4:7], 0
; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v5i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b16 v2, off, s[0:3], 0
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <5 x i16> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v8i16(<8 x i16> %arg0) #0 {
; CI-LABEL: void_func_v8i16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; CI-NEXT: v_and_b32_e32 v6, 0xffff, v6
; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
; CI-NEXT: v_or_b32_e32 v6, v6, v7
; CI-NEXT: v_or_b32_e32 v5, v4, v5
; CI-NEXT: v_or_b32_e32 v4, v2, v3
; CI-NEXT: v_or_b32_e32 v3, v0, v1
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v8i16:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x i16> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v16i16(<16 x i16> %arg0) #0 {
; CI-LABEL: void_func_v16i16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
; CI-NEXT: v_or_b32_e32 v5, v4, v5
; CI-NEXT: v_or_b32_e32 v4, v2, v3
; CI-NEXT: v_or_b32_e32 v3, v0, v1
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v15
; CI-NEXT: v_and_b32_e32 v1, 0xffff, v14
; CI-NEXT: v_or_b32_e32 v14, v1, v0
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v13
; CI-NEXT: v_and_b32_e32 v1, 0xffff, v12
; CI-NEXT: v_or_b32_e32 v13, v1, v0
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v11
; CI-NEXT: v_and_b32_e32 v1, 0xffff, v10
; CI-NEXT: v_or_b32_e32 v12, v1, v0
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v9
; CI-NEXT: v_and_b32_e32 v1, 0xffff, v8
; CI-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; CI-NEXT: v_and_b32_e32 v6, 0xffff, v6
; CI-NEXT: v_or_b32_e32 v11, v1, v0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: v_or_b32_e32 v6, v6, v7
; CI-NEXT: buffer_store_dwordx4 v[11:14], off, s[4:7], 0
; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v16i16:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v16i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x i16> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v2i24(<2 x i24> %arg0) #0 {
; CI-LABEL: void_func_v2i24:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_store_byte v1, off, s[4:7], 0
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_v2i24:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0
; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_v2i24:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: buffer_store_byte v1, off, s[4:7], 0
; GFX9-NEXT: buffer_store_short v0, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2i24:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%elt0 = extractelement <2 x i24> %arg0, i32 0
%elt1 = extractelement <2 x i24> %arg0, i32 1
%add = add i24 %elt0, %elt1
store i24 %add, ptr addrspace(1) undef
ret void
}
define void @void_func_v2f32(<2 x float> %arg0) #0 {
; CIGFX89-LABEL: void_func_v2f32:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x float> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v3f32(<3 x float> %arg0) #0 {
; CIGFX89-LABEL: void_func_v3f32:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x float> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v4f32(<4 x float> %arg0) #0 {
; CIGFX89-LABEL: void_func_v4f32:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x float> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v8f32(<8 x float> %arg0) #0 {
; CIGFX89-LABEL: void_func_v8f32:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x float> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v16f32(<16 x float> %arg0) #0 {
; CIGFX89-LABEL: void_func_v16f32:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v16f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x float> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v2f64(<2 x double> %arg0) #0 {
; CIGFX89-LABEL: void_func_v2f64:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x double> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v3f64(<3 x double> %arg0) #0 {
; CIGFX89-LABEL: void_func_v3f64:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x double> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v4f64(<4 x double> %arg0) #0 {
; CIGFX89-LABEL: void_func_v4f64:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x double> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v8f64(<8 x double> %arg0) #0 {
; CIGFX89-LABEL: void_func_v8f64:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x double> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v16f64(<16 x double> %arg0) #0 {
; CIGFX89-LABEL: void_func_v16f64:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(6)
; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v16f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x double> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v2f16(<2 x half> %arg0) #0 {
; CI-LABEL: void_func_v2f16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; CI-NEXT: v_or_b32_e32 v0, v0, v1
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v2f16:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x half> %arg0, ptr addrspace(1) undef
ret void
}
; FIXME: Different abi if f16 legal
define void @void_func_v3f16(<3 x half> %arg0) #0 {
; CI-LABEL: void_func_v3f16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: v_or_b32_e32 v0, v0, v1
; CI-NEXT: buffer_store_short v2, off, s[4:7], 0
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v3f16:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_store_short v1, off, s[4:7], 0
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x half> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v4f16(<4 x half> %arg0) #0 {
; CI-LABEL: void_func_v4f16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
; CI-NEXT: v_cvt_f16_f32_e32 v4, v1
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v3
; CI-NEXT: v_or_b32_e32 v1, v2, v1
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v4
; CI-NEXT: v_or_b32_e32 v0, v0, v2
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v4f16:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x half> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v8f16(<8 x half> %arg0) #0 {
; CI-LABEL: void_func_v8f16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
; CI-NEXT: v_cvt_f16_f32_e32 v8, v5
; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v7
; CI-NEXT: v_or_b32_e32 v5, v6, v5
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v8
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; CI-NEXT: v_or_b32_e32 v4, v4, v6
; CI-NEXT: v_or_b32_e32 v3, v2, v3
; CI-NEXT: v_or_b32_e32 v2, v0, v1
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_store_dwordx4 v[2:5], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v8f16:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x half> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v16f16(<16 x half> %arg0) #0 {
; CI-LABEL: void_func_v16f16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_cvt_f16_f32_e32 v7, v7
; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
; CI-NEXT: v_cvt_f16_f32_e32 v6, v6
; CI-NEXT: v_cvt_f16_f32_e32 v16, v5
; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
; CI-NEXT: v_cvt_f16_f32_e32 v0, v0
; CI-NEXT: v_cvt_f16_f32_e32 v4, v4
; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v7
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; CI-NEXT: v_or_b32_e32 v5, v6, v5
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v16
; CI-NEXT: v_or_b32_e32 v3, v2, v3
; CI-NEXT: v_or_b32_e32 v2, v0, v1
; CI-NEXT: v_cvt_f16_f32_e32 v0, v15
; CI-NEXT: v_or_b32_e32 v4, v4, v6
; CI-NEXT: v_cvt_f16_f32_e32 v1, v14
; CI-NEXT: v_cvt_f16_f32_e32 v6, v13
; CI-NEXT: v_cvt_f16_f32_e32 v7, v12
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; CI-NEXT: v_or_b32_e32 v13, v1, v0
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v6
; CI-NEXT: v_or_b32_e32 v12, v7, v0
; CI-NEXT: v_cvt_f16_f32_e32 v0, v11
; CI-NEXT: v_cvt_f16_f32_e32 v1, v10
; CI-NEXT: v_cvt_f16_f32_e32 v6, v9
; CI-NEXT: v_cvt_f16_f32_e32 v7, v8
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; CI-NEXT: v_or_b32_e32 v11, v1, v0
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v6
; CI-NEXT: v_or_b32_e32 v10, v7, v0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_store_dwordx4 v[10:13], off, s[4:7], 0
; CI-NEXT: buffer_store_dwordx4 v[2:5], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v16f16:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v16f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x half> %arg0, ptr addrspace(1) undef
ret void
}
; Make sure there is no alignment requirement for passed vgprs.
define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 {
; CIGFX89-LABEL: void_func_i32_i64_i32:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: buffer_store_dword v3, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_i32_i64_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b64 v[1:2], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b32 v3, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile i32 %arg0, ptr addrspace(1) undef
store volatile i64 %arg1, ptr addrspace(1) undef
store volatile i32 %arg2, ptr addrspace(1) undef
ret void
}
define void @void_func_struct_i32({ i32 } %arg0) #0 {
; CIGFX89-LABEL: void_func_struct_i32:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_struct_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store { i32 } %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 {
; CIGFX89-LABEL: void_func_struct_i8_i32:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_dword v1, off, s[4:7], 0
; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_struct_i8_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b32 v1, off, s[0:3], 0
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store { i8, i32 } %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %arg0) #0 {
; CIGFX89-LABEL: void_func_byval_struct_i8_i32:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4
; CIGFX89-NEXT: buffer_load_ubyte v1, off, s[0:3], s32
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: s_waitcnt vmcnt(1)
; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(1)
; CIGFX89-NEXT: buffer_store_byte v1, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_byval_struct_i8_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_load_b32 v0, off, s32 offset:4
; GFX11-NEXT: scratch_load_u8 v1, off, s32
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%arg0.load = load { i8, i32 }, ptr addrspace(5) %arg0
store { i8, i32 } %arg0.load, ptr addrspace(1) undef
ret void
}
define void @void_func_byval_struct_i8_i32_x2(ptr addrspace(5) byval({ i8, i32 }) %arg0, ptr addrspace(5) byval({ i8, i32 }) %arg1, i32 %arg2) #0 {
; CI-LABEL: void_func_byval_struct_i8_i32_x2:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 glc
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 glc
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 glc
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: buffer_store_dword v2, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v1, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dword v4, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v3, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: ds_write_b32 v0, v0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_byval_struct_i8_i32_x2:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 glc
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 glc
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 glc
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_mov_b32 m0, -1
; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v3, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: ds_write_b32 v0, v0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_byval_struct_i8_i32_x2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: buffer_store_dword v2, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v1, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dword v4, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v3, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ds_write_b32 v0, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_byval_struct_i8_i32_x2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: scratch_load_u8 v1, off, s32 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: scratch_load_b32 v2, off, s32 offset:4 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: scratch_load_u8 v3, off, s32 offset:8 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: scratch_load_b32 v4, off, s32 offset:12 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v2, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b32 v4, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b8 v3, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: ds_store_b32 v0, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%arg0.load = load volatile { i8, i32 }, ptr addrspace(5) %arg0
%arg1.load = load volatile { i8, i32 }, ptr addrspace(5) %arg1
store volatile { i8, i32 } %arg0.load, ptr addrspace(1) undef
store volatile { i8, i32 } %arg1.load, ptr addrspace(1) undef
store volatile i32 %arg2, ptr addrspace(3) undef
ret void
}
define void @void_func_byval_i32_byval_i64(ptr addrspace(5) byval(i32) %arg0, ptr addrspace(5) byval(i64) %arg1) #0 {
; CIGFX89-LABEL: void_func_byval_i32_byval_i64:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: buffer_load_dword v2, off, s[0:3], s32
; CIGFX89-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8
; CIGFX89-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: s_waitcnt vmcnt(2)
; CIGFX89-NEXT: buffer_store_dword v2, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(1)
; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_byval_i32_byval_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_load_b32 v2, off, s32
; GFX11-NEXT: scratch_load_b64 v[0:1], off, s32 offset:8
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: buffer_store_b32 v2, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%arg0.load = load i32, ptr addrspace(5) %arg0
%arg1.load = load i64, ptr addrspace(5) %arg1
store i32 %arg0.load, ptr addrspace(1) undef
store i64 %arg1.load, ptr addrspace(1) undef
ret void
}
define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 {
; CI-LABEL: void_func_v32i32_i32_i64:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:12
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:8
; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:4
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dword v20, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_v32i32_i32_i64:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:12
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:8
; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:4
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dword v20, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_v32i32_i32_i64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:12
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:8
; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:4
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dword v20, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v32i32_i32_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:12
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:4
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:8
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt vmcnt(3)
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: buffer_store_b32 v34, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_store_b64 v[32:33], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
store volatile i32 %arg1, ptr addrspace(1) undef
store volatile i64 %arg2, ptr addrspace(1) undef
ret void
}
; FIXME: Different ext load types on CI vs. VI
define void @void_func_v32i32_i1_i8_i16_bf16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4, bfloat %arg5) #0 {
; CI-LABEL: void_func_v32i32_i1_i8_i16_bf16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:20
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:16
; CI-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:4
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:8
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:12
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_cvt_f16_f32_e32 v16, v16
; CI-NEXT: v_mul_f32_e32 v20, 1.0, v20
; CI-NEXT: v_and_b32_e32 v0, 1, v17
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v20
; CI-NEXT: buffer_store_byte v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v18, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_short v19, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_short v16, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_short v1, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_v32i32_i1_i8_i16_bf16:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:4
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_ushort v16, off, s[0:3], s32 offset:8
; VI-NEXT: buffer_load_ushort v17, off, s[0:3], s32 offset:12
; VI-NEXT: buffer_load_ushort v18, off, s[0:3], s32 offset:16
; VI-NEXT: buffer_load_ushort v19, off, s[0:3], s32 offset:20
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_and_b32_e32 v0, 1, v20
; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v16, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_short v17, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_short v18, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_short v19, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_v32i32_i1_i8_i16_bf16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:4
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_ushort v16, off, s[0:3], s32 offset:8
; GFX9-NEXT: buffer_load_ushort v17, off, s[0:3], s32 offset:12
; GFX9-NEXT: buffer_load_ushort v18, off, s[0:3], s32 offset:16
; GFX9-NEXT: buffer_load_ushort v19, off, s[0:3], s32 offset:20
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v0, 1, v20
; GFX9-NEXT: buffer_store_byte v0, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v16, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_short v17, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_short v18, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_short v19, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v32i32_i1_i8_i16_bf16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_clause 0x5
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: scratch_load_u8 v32, off, s32 offset:4
; GFX11-NEXT: scratch_load_u16 v33, off, s32 offset:8
; GFX11-NEXT: scratch_load_u16 v34, off, s32 offset:12
; GFX11-NEXT: scratch_load_u16 v35, off, s32 offset:16
; GFX11-NEXT: scratch_load_u16 v36, off, s32 offset:20
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt vmcnt(5)
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(4)
; GFX11-NEXT: v_and_b32_e32 v16, 1, v32
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b8 v16, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(3)
; GFX11-NEXT: buffer_store_b8 v33, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(2)
; GFX11-NEXT: buffer_store_b16 v34, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: buffer_store_b16 v35, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_store_b16 v36, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
store volatile i1 %arg1, ptr addrspace(1) undef
store volatile i8 %arg2, ptr addrspace(1) undef
store volatile i16 %arg3, ptr addrspace(1) undef
store volatile half %arg4, ptr addrspace(1) undef
store volatile bfloat %arg5, ptr addrspace(1) undef
ret void
}
define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 {
; CI-LABEL: void_func_v32i32_v2i32_v2f32:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx2 v[18:19], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_v32i32_v2i32_v2f32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[18:19], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_v32i32_v2i32_v2f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx2 v[18:19], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v32i32_v2i32_v2f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_clause 0x4
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:16
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:12
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt vmcnt(4)
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(2)
; GFX11-NEXT: buffer_store_b64 v[32:33], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_store_b64 v[34:35], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
store volatile <2 x i32> %arg1, ptr addrspace(1) undef
store volatile <2 x float> %arg2, ptr addrspace(1) undef
ret void
}
define void @void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2, <2 x bfloat> %arg3, <4 x bfloat> %arg4) #0 {
; CI-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:32
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:36
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:40
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:20
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:24
; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:12
; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:16
; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:8
; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:28
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:4
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_short v15, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_short v8, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_cvt_f16_f32_e32 v14, v14
; CI-NEXT: v_cvt_f16_f32_e32 v13, v13
; CI-NEXT: v_mul_f32_e32 v9, 1.0, v20
; CI-NEXT: v_mul_f32_e32 v10, 1.0, v16
; CI-NEXT: v_mul_f32_e32 v11, 1.0, v17
; CI-NEXT: v_mul_f32_e32 v16, 1.0, v18
; CI-NEXT: v_mul_f32_e32 v17, 1.0, v19
; CI-NEXT: v_mul_f32_e32 v12, 1.0, v12
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v9
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v10
; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v11
; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v16
; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v17
; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v12
; CI-NEXT: buffer_store_short v14, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_short v13, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_short v5, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_short v4, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_short v3, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_short v2, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_short v1, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:4
; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:8
; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:20
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:16
; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:12
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dword v19, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dword v20, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:4
; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:8
; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:20
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:16
; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:12
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dword v18, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dword v19, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dword v20, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx2 v[16:17], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_clause 0x5
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:20
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:4
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:8
; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:12
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:16
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt vmcnt(5)
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(3)
; GFX11-NEXT: buffer_store_b32 v34, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(2)
; GFX11-NEXT: buffer_store_b32 v35, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: buffer_store_b32 v36, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_store_b64 v[32:33], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
store volatile <2 x i16> %arg1, ptr addrspace(1) undef
store volatile <2 x half> %arg2, ptr addrspace(1) undef
store volatile <2 x bfloat> %arg3, ptr addrspace(1) undef
store volatile <4 x bfloat> %arg4, ptr addrspace(1) undef
ret void
}
define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 {
; CI-LABEL: void_func_v32i32_v2i64_v2f64:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_v32i32_v2i64_v2f64:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_v32i32_v2i64_v2f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v32i32_v2i64_v2f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_clause 0x8
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:32
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:28
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:24
; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:16
; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:12
; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:8
; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:4
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:20
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt vmcnt(8)
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
store volatile <2 x i64> %arg1, ptr addrspace(1) undef
store volatile <2 x double> %arg2, ptr addrspace(1) undef
ret void
}
define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 {
; CI-LABEL: void_func_v32i32_v4i32_v4f32:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_v32i32_v4i32_v4f32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_v32i32_v4i32_v4f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16
; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12
; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32
; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28
; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24
; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v32i32_v4i32_v4f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_clause 0x8
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:16
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:12
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:32
; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:28
; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:24
; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:20
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt vmcnt(8)
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(4)
; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
store volatile <4 x i32> %arg1, ptr addrspace(1) undef
store volatile <4 x float> %arg2, ptr addrspace(1) undef
ret void
}
define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 {
; CI-LABEL: void_func_v32i32_v8i32_v8f32:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:16
; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:12
; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:8
; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:4
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:32
; CI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:28
; CI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:24
; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:20
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:48
; CI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:44
; CI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:40
; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:36
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_v32i32_v8i32_v8f32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64
; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60
; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:16
; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:12
; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:8
; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:4
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:32
; VI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:28
; VI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:24
; VI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:20
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:48
; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:44
; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:40
; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:36
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_v32i32_v8i32_v8f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64
; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60
; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:16
; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:12
; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:8
; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:4
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:32
; GFX9-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:28
; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:24
; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:20
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:48
; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:44
; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:40
; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:36
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v32i32_v8i32_v8f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_clause 0x10
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:48
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:44
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:40
; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:64
; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:60
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:56
; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:16
; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:12
; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:8
; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:32
; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:28
; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:24
; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:20
; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:4
; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:52
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:36
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt vmcnt(11)
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(3)
; GFX11-NEXT: buffer_store_b128 v[52:55], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(2)
; GFX11-NEXT: buffer_store_b128 v[48:51], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
store volatile <8 x i32> %arg1, ptr addrspace(1) undef
store volatile <8 x float> %arg2, ptr addrspace(1) undef
ret void
}
define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 {
; CI-LABEL: void_func_v32i32_v16i32_v16f32:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:48
; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:44
; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:40
; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:36
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:32
; CI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:28
; CI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:24
; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:20
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96
; CI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92
; CI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88
; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:112
; CI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:108
; CI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:104
; CI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:100
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:128
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:124
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:120
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:116
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:16
; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:12
; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:8
; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:4
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:80
; CI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:76
; CI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:72
; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:68
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_v32i32_v16i32_v16f32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64
; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60
; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:48
; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:44
; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:40
; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:36
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:32
; VI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:28
; VI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:24
; VI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:20
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96
; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92
; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88
; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:112
; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:108
; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:104
; VI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:100
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:128
; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:124
; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:120
; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:116
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:16
; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:12
; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:8
; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:4
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:80
; VI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:76
; VI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:72
; VI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:68
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_v32i32_v16i32_v16f32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:64
; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:60
; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:56
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:52
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:48
; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:44
; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:40
; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:36
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:32
; GFX9-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:28
; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:24
; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:20
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96
; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92
; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88
; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:112
; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:108
; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:104
; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:100
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:128
; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:124
; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:120
; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:116
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:16
; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:12
; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:8
; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:4
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:80
; GFX9-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:76
; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:72
; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:68
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v32i32_v16i32_v16f32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1f
; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:80
; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:76
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:72
; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:96
; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:92
; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:88
; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:112
; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:108
; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:104
; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:128
; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:124
; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:120
; GFX11-NEXT: scratch_load_b32 v67, off, s32 offset:16
; GFX11-NEXT: scratch_load_b32 v66, off, s32 offset:12
; GFX11-NEXT: scratch_load_b32 v65, off, s32 offset:8
; GFX11-NEXT: scratch_load_b32 v71, off, s32 offset:32
; GFX11-NEXT: scratch_load_b32 v70, off, s32 offset:28
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: scratch_load_b32 v69, off, s32 offset:24
; GFX11-NEXT: scratch_load_b32 v83, off, s32 offset:48
; GFX11-NEXT: scratch_load_b32 v82, off, s32 offset:44
; GFX11-NEXT: scratch_load_b32 v81, off, s32 offset:40
; GFX11-NEXT: scratch_load_b32 v87, off, s32 offset:64
; GFX11-NEXT: scratch_load_b32 v86, off, s32 offset:60
; GFX11-NEXT: scratch_load_b32 v85, off, s32 offset:56
; GFX11-NEXT: scratch_load_b32 v84, off, s32 offset:52
; GFX11-NEXT: scratch_load_b32 v80, off, s32 offset:36
; GFX11-NEXT: scratch_load_b32 v68, off, s32 offset:20
; GFX11-NEXT: scratch_load_b32 v64, off, s32 offset:4
; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:116
; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:100
; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:84
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:68
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt vmcnt(15)
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(7)
; GFX11-NEXT: buffer_store_b128 v[84:87], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(6)
; GFX11-NEXT: buffer_store_b128 v[80:83], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(5)
; GFX11-NEXT: buffer_store_b128 v[68:71], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(4)
; GFX11-NEXT: buffer_store_b128 v[64:67], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(3)
; GFX11-NEXT: buffer_store_b128 v[52:55], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(2)
; GFX11-NEXT: buffer_store_b128 v[48:51], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
store volatile <16 x i32> %arg1, ptr addrspace(1) undef
store volatile <16 x float> %arg2, ptr addrspace(1) undef
ret void
}
; Make sure v3 isn't a wasted register because of v3 types being promoted to v4
define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 {
; CI-LABEL: void_func_v3f32_wasted_reg:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: ds_write_b32 v0, v0
; CI-NEXT: ds_write_b32 v0, v1
; CI-NEXT: ds_write_b32 v0, v2
; CI-NEXT: ds_write_b32 v0, v3
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_v3f32_wasted_reg:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: s_mov_b32 m0, -1
; VI-NEXT: ds_write_b32 v0, v0
; VI-NEXT: ds_write_b32 v0, v1
; VI-NEXT: ds_write_b32 v0, v2
; VI-NEXT: ds_write_b32 v0, v3
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_v3f32_wasted_reg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ds_write_b32 v0, v0
; GFX9-NEXT: ds_write_b32 v0, v1
; GFX9-NEXT: ds_write_b32 v0, v2
; GFX9-NEXT: ds_write_b32 v0, v3
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3f32_wasted_reg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: ds_store_b32 v0, v0
; GFX11-NEXT: ds_store_b32 v0, v1
; GFX11-NEXT: ds_store_b32 v0, v2
; GFX11-NEXT: ds_store_b32 v0, v3
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%arg0.0 = extractelement <3 x float> %arg0, i32 0
%arg0.1 = extractelement <3 x float> %arg0, i32 1
%arg0.2 = extractelement <3 x float> %arg0, i32 2
store volatile float %arg0.0, ptr addrspace(3) undef
store volatile float %arg0.1, ptr addrspace(3) undef
store volatile float %arg0.2, ptr addrspace(3) undef
store volatile i32 %arg1, ptr addrspace(3) undef
ret void
}
define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 {
; CI-LABEL: void_func_v3i32_wasted_reg:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: ds_write_b32 v0, v0
; CI-NEXT: ds_write_b32 v0, v1
; CI-NEXT: ds_write_b32 v0, v2
; CI-NEXT: ds_write_b32 v0, v3
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_v3i32_wasted_reg:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: s_mov_b32 m0, -1
; VI-NEXT: ds_write_b32 v0, v0
; VI-NEXT: ds_write_b32 v0, v1
; VI-NEXT: ds_write_b32 v0, v2
; VI-NEXT: ds_write_b32 v0, v3
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_v3i32_wasted_reg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ds_write_b32 v0, v0
; GFX9-NEXT: ds_write_b32 v0, v1
; GFX9-NEXT: ds_write_b32 v0, v2
; GFX9-NEXT: ds_write_b32 v0, v3
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3i32_wasted_reg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: ds_store_b32 v0, v0
; GFX11-NEXT: ds_store_b32 v0, v1
; GFX11-NEXT: ds_store_b32 v0, v2
; GFX11-NEXT: ds_store_b32 v0, v3
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%arg0.0 = extractelement <3 x i32> %arg0, i32 0
%arg0.1 = extractelement <3 x i32> %arg0, i32 1
%arg0.2 = extractelement <3 x i32> %arg0, i32 2
store volatile i32 %arg0.0, ptr addrspace(3) undef
store volatile i32 %arg0.1, ptr addrspace(3) undef
store volatile i32 %arg0.2, ptr addrspace(3) undef
store volatile i32 %arg1, ptr addrspace(3) undef
ret void
}
; Check there is no crash.
define void @void_func_volatile_v16i8(<16 x i8> %arg0) #0 {
; CIGFX89-LABEL: void_func_volatile_v16i8:
; CIGFX89: ; %bb.0:
; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CIGFX89-NEXT: s_mov_b32 s7, 0xf000
; CIGFX89-NEXT: s_mov_b32 s6, -1
; CIGFX89-NEXT: buffer_store_byte v15, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: buffer_store_byte v14, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: buffer_store_byte v13, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: buffer_store_byte v12, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: buffer_store_byte v11, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: buffer_store_byte v10, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: buffer_store_byte v9, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: buffer_store_byte v8, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: buffer_store_byte v7, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: buffer_store_byte v6, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: buffer_store_byte v5, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: buffer_store_byte v4, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: buffer_store_byte v3, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: buffer_store_byte v2, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: buffer_store_byte v1, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_volatile_v16i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b8 v15, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b8 v14, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b8 v13, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b8 v12, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b8 v11, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b8 v10, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b8 v9, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b8 v8, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b8 v7, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b8 v6, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b8 v5, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b8 v4, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b8 v3, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b8 v2, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile <16 x i8> %arg0, ptr addrspace(1) undef
ret void
}
; Check there is no crash.
define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
; CI-LABEL: void_func_v32i32_v16i8:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:64
; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:48
; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:52
; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:56
; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:36
; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:40
; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:44
; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:28
; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:32
; CI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:20
; CI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:24
; CI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:16
; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12
; CI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:8
; CI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:4
; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:60
; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v16, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v20, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v19, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v18, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v17, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v14, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v13, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v12, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v8, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v15, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v10, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v9, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v11, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v4, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v5, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: buffer_store_byte v6, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: void_func_v32i32_v16i8:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:64
; VI-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:48
; VI-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:52
; VI-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:56
; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:36
; VI-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:40
; VI-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:44
; VI-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:28
; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_ubyte v8, off, s[0:3], s32 offset:32
; VI-NEXT: buffer_load_ubyte v9, off, s[0:3], s32 offset:20
; VI-NEXT: buffer_load_ubyte v10, off, s[0:3], s32 offset:24
; VI-NEXT: buffer_load_ubyte v11, off, s[0:3], s32 offset:16
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_ubyte v4, off, s[0:3], s32 offset:12
; VI-NEXT: buffer_load_ubyte v5, off, s[0:3], s32 offset:8
; VI-NEXT: buffer_load_ubyte v6, off, s[0:3], s32 offset:4
; VI-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:60
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v16, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v20, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v19, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v18, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v17, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v14, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v13, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v12, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v8, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v15, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v10, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v9, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v11, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v4, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v5, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_byte v6, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_v32i32_v16i8:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:64
; GFX9-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:48
; GFX9-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:52
; GFX9-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:56
; GFX9-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:60
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:36
; GFX9-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:40
; GFX9-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:44
; GFX9-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:28
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_ubyte v8, off, s[0:3], s32 offset:32
; GFX9-NEXT: buffer_load_ubyte v9, off, s[0:3], s32 offset:20
; GFX9-NEXT: buffer_load_ubyte v10, off, s[0:3], s32 offset:24
; GFX9-NEXT: buffer_load_ubyte v11, off, s[0:3], s32 offset:16
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_ubyte v4, off, s[0:3], s32 offset:12
; GFX9-NEXT: buffer_load_ubyte v5, off, s[0:3], s32 offset:8
; GFX9-NEXT: buffer_load_ubyte v6, off, s[0:3], s32 offset:4
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v16, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v20, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v19, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v18, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v17, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v14, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v13, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v12, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v8, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v15, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v10, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v9, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v11, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v4, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v5, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_byte v6, off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v32i32_v16i8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_clause 0x10
; GFX11-NEXT: scratch_load_b32 v31, off, s32
; GFX11-NEXT: scratch_load_u8 v32, off, s32 offset:64
; GFX11-NEXT: scratch_load_u8 v33, off, s32 offset:60
; GFX11-NEXT: scratch_load_u8 v34, off, s32 offset:56
; GFX11-NEXT: scratch_load_u8 v35, off, s32 offset:52
; GFX11-NEXT: scratch_load_u8 v36, off, s32 offset:48
; GFX11-NEXT: scratch_load_u8 v37, off, s32 offset:44
; GFX11-NEXT: scratch_load_u8 v38, off, s32 offset:40
; GFX11-NEXT: scratch_load_u8 v39, off, s32 offset:36
; GFX11-NEXT: scratch_load_u8 v48, off, s32 offset:32
; GFX11-NEXT: scratch_load_u8 v49, off, s32 offset:28
; GFX11-NEXT: scratch_load_u8 v50, off, s32 offset:24
; GFX11-NEXT: scratch_load_u8 v51, off, s32 offset:20
; GFX11-NEXT: scratch_load_u8 v52, off, s32 offset:16
; GFX11-NEXT: scratch_load_u8 v53, off, s32 offset:12
; GFX11-NEXT: scratch_load_u8 v54, off, s32 offset:8
; GFX11-NEXT: scratch_load_u8 v55, off, s32 offset:4
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt vmcnt(16)
; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(15)
; GFX11-NEXT: buffer_store_b8 v32, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(14)
; GFX11-NEXT: buffer_store_b8 v33, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(13)
; GFX11-NEXT: buffer_store_b8 v34, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(12)
; GFX11-NEXT: buffer_store_b8 v35, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(11)
; GFX11-NEXT: buffer_store_b8 v36, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(10)
; GFX11-NEXT: buffer_store_b8 v37, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(9)
; GFX11-NEXT: buffer_store_b8 v38, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(8)
; GFX11-NEXT: buffer_store_b8 v39, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(7)
; GFX11-NEXT: buffer_store_b8 v48, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(6)
; GFX11-NEXT: buffer_store_b8 v49, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(5)
; GFX11-NEXT: buffer_store_b8 v50, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(4)
; GFX11-NEXT: buffer_store_b8 v51, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(3)
; GFX11-NEXT: buffer_store_b8 v52, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(2)
; GFX11-NEXT: buffer_store_b8 v53, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: buffer_store_b8 v54, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_store_b8 v55, off, s[0:3], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store volatile <32 x i32> %arg0, ptr addrspace(1) undef
store volatile <16 x i8> %arg1, ptr addrspace(1) undef
ret void
}
define void @void_func_bf16(bfloat %arg0) #0 {
; CI-LABEL: void_func_bf16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_store_short v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_bf16:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_bf16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store bfloat %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v2bf16(<2 x bfloat> %arg0) #0 {
; CI-LABEL: void_func_v2bf16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
; CI-NEXT: v_alignbit_b32 v0, v1, v0, 16
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v2bf16:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v2bf16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <2 x bfloat> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v3bf16(<3 x bfloat> %arg0) #0 {
; CI-LABEL: void_func_v3bf16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
; CI-NEXT: v_alignbit_b32 v0, v1, v0, 16
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v2
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_store_short v1, off, s[4:7], 0
; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v3bf16:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_store_short v1, off, s[4:7], 0
; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v3bf16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <3 x bfloat> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v4bf16(<4 x bfloat> %arg0) #0 {
; CI-LABEL: void_func_v4bf16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_mul_f32_e32 v3, 1.0, v3
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; CI-NEXT: v_mul_f32_e32 v2, 1.0, v2
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
; CI-NEXT: v_alignbit_b32 v2, v3, v2, 16
; CI-NEXT: v_alignbit_b32 v1, v1, v0, 16
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v4bf16:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v4bf16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <4 x bfloat> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v8bf16(<8 x bfloat> %arg0) #0 {
; CI-LABEL: void_func_v8bf16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_mul_f32_e32 v7, 1.0, v7
; CI-NEXT: v_mul_f32_e32 v5, 1.0, v5
; CI-NEXT: v_mul_f32_e32 v3, 1.0, v3
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v7
; CI-NEXT: v_mul_f32_e32 v6, 1.0, v6
; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; CI-NEXT: v_mul_f32_e32 v4, 1.0, v4
; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; CI-NEXT: v_mul_f32_e32 v2, 1.0, v2
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
; CI-NEXT: v_alignbit_b32 v6, v7, v6, 16
; CI-NEXT: v_alignbit_b32 v5, v5, v4, 16
; CI-NEXT: v_alignbit_b32 v4, v3, v2, 16
; CI-NEXT: v_alignbit_b32 v3, v1, v0, 16
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v8bf16:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v8bf16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <8 x bfloat> %arg0, ptr addrspace(1) undef
ret void
}
define void @void_func_v16bf16(<16 x bfloat> %arg0) #0 {
; CI-LABEL: void_func_v16bf16:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_mul_f32_e32 v5, 1.0, v5
; CI-NEXT: v_mul_f32_e32 v3, 1.0, v3
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; CI-NEXT: v_mul_f32_e32 v4, 1.0, v4
; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; CI-NEXT: v_mul_f32_e32 v2, 1.0, v2
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
; CI-NEXT: v_alignbit_b32 v5, v5, v4, 16
; CI-NEXT: v_alignbit_b32 v4, v3, v2, 16
; CI-NEXT: v_alignbit_b32 v3, v1, v0, 16
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v15
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v14
; CI-NEXT: v_alignbit_b32 v14, v0, v1, 16
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v13
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v12
; CI-NEXT: v_alignbit_b32 v13, v0, v1, 16
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v11
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v10
; CI-NEXT: v_alignbit_b32 v12, v0, v1, 16
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v9
; CI-NEXT: v_mul_f32_e32 v7, 1.0, v7
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v8
; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v7
; CI-NEXT: v_mul_f32_e32 v6, 1.0, v6
; CI-NEXT: v_alignbit_b32 v11, v0, v1, 16
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: v_alignbit_b32 v6, v7, v6, 16
; CI-NEXT: buffer_store_dwordx4 v[11:14], off, s[4:7], 0
; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: void_func_v16bf16:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_v16bf16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
store <16 x bfloat> %arg0, ptr addrspace(1) undef
ret void
}
attributes #0 = { nounwind }