; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX789,CI %s
; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX789,GFX89,GFX8 %s
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX789,GFX89,GFX9 %s
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
define i1 @i1_func_void() #0 {
; GFX789-LABEL: i1_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_ubyte v0, off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i1_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load i1, ptr addrspace(1) undef
ret i1 %val
}
; FIXME: Missing and?
define zeroext i1 @i1_zeroext_func_void() #0 {
; GFX789-LABEL: i1_zeroext_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_ubyte v0, off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i1_zeroext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load i1, ptr addrspace(1) undef
ret i1 %val
}
define signext i1 @i1_signext_func_void() #0 {
; GFX789-LABEL: i1_signext_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_ubyte v0, off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i1_signext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load i1, ptr addrspace(1) undef
ret i1 %val
}
define i8 @i8_func_void() #0 {
; GFX789-LABEL: i8_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_ubyte v0, off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i8_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load i8, ptr addrspace(1) undef
ret i8 %val
}
define zeroext i8 @i8_zeroext_func_void() #0 {
; GFX789-LABEL: i8_zeroext_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_ubyte v0, off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i8_zeroext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load i8, ptr addrspace(1) undef
ret i8 %val
}
define signext i8 @i8_signext_func_void() #0 {
; GFX789-LABEL: i8_signext_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_sbyte v0, off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i8_signext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_i8 v0, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load i8, ptr addrspace(1) undef
ret i8 %val
}
define i16 @i16_func_void() #0 {
; GFX789-LABEL: i16_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_ushort v0, off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load i16, ptr addrspace(1) undef
ret i16 %val
}
define zeroext i16 @i16_zeroext_func_void() #0 {
; GFX789-LABEL: i16_zeroext_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_ushort v0, off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i16_zeroext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load i16, ptr addrspace(1) undef
ret i16 %val
}
define signext i16 @i16_signext_func_void() #0 {
; GFX789-LABEL: i16_signext_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_sshort v0, off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i16_signext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_i16 v0, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load i16, ptr addrspace(1) undef
ret i16 %val
}
define i32 @i32_func_void() #0 {
; GFX789-LABEL: i32_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_dword v0, off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load i32, ptr addrspace(1) undef
ret i32 %val
}
define i48 @i48_func_void() #0 {
; GFX789-LABEL: i48_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_dword v0, off, s[4:7], 0
; GFX789-NEXT: buffer_load_ushort v1, off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i48_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0
; GFX11-NEXT: buffer_load_u16 v1, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load i48, ptr addrspace(1) undef, align 8
ret i48 %val
}
define zeroext i48 @i48_zeroext_func_void() #0 {
; GFX789-LABEL: i48_zeroext_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_dword v0, off, s[4:7], 0
; GFX789-NEXT: buffer_load_ushort v1, off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i48_zeroext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0
; GFX11-NEXT: buffer_load_u16 v1, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load i48, ptr addrspace(1) undef, align 8
ret i48 %val
}
define signext i48 @i48_signext_func_void() #0 {
; GFX789-LABEL: i48_signext_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_dword v0, off, s[4:7], 0
; GFX789-NEXT: buffer_load_sshort v1, off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i48_signext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0
; GFX11-NEXT: buffer_load_i16 v1, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load i48, ptr addrspace(1) undef, align 8
ret i48 %val
}
define i63 @i63_func_void(i63 %val) #0 {
; GFX789-LABEL: i63_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i63_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
ret i63 %val
}
define zeroext i63 @i63_zeroext_func_void(i63 %val) #0 {
; GFX789-LABEL: i63_zeroext_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i63_zeroext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
ret i63 %val
}
define signext i63 @i63_signext_func_void(i63 %val) #0 {
; CI-LABEL: i63_signext_func_void:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
; CI-NEXT: v_ashr_i64 v[0:1], v[0:1], 1
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: i63_signext_func_void:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX89-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i63_signext_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
; GFX11-NEXT: s_setpc_b64 s[30:31]
ret i63 %val
}
define i64 @i64_func_void() #0 {
; GFX789-LABEL: i64_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i64_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load i64, ptr addrspace(1) undef
ret i64 %val
}
define i65 @i65_func_void() #0 {
; GFX789-LABEL: i65_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; GFX789-NEXT: buffer_load_ubyte v2, off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i65_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: buffer_load_u8 v2, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load i65, ptr addrspace(1) undef
ret i65 %val
}
define float @f32_func_void() #0 {
; GFX789-LABEL: f32_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_dword v0, off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: f32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load float, ptr addrspace(1) undef
ret float %val
}
define double @f64_func_void() #0 {
; GFX789-LABEL: f64_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: f64_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load double, ptr addrspace(1) undef
ret double %val
}
define <2 x double> @v2f64_func_void() #0 {
; GFX789-LABEL: v2f64_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v2f64_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load <2 x double>, ptr addrspace(1) undef
ret <2 x double> %val
}
define <2 x i32> @v2i32_func_void() #0 {
; GFX789-LABEL: v2i32_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v2i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load <2 x i32>, ptr addrspace(1) undef
ret <2 x i32> %val
}
define <3 x i32> @v3i32_func_void() #0 {
; GFX789-LABEL: v3i32_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_dwordx3 v[0:2], off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v3i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b96 v[0:2], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load <3 x i32>, ptr addrspace(1) undef
ret <3 x i32> %val
}
define <4 x i32> @v4i32_func_void() #0 {
; GFX789-LABEL: v4i32_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v4i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load <4 x i32>, ptr addrspace(1) undef
ret <4 x i32> %val
}
define <5 x i32> @v5i32_func_void() #0 {
; GFX789-LABEL: v5i32_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_dword v4, off, s[4:7], 0 glc
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 glc
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v5i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b32 v4, off, s[0:3], 0 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load volatile <5 x i32>, ptr addrspace(1) undef
ret <5 x i32> %val
}
define <8 x i32> @v8i32_func_void() #0 {
; GFX789-LABEL: v8i32_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: s_waitcnt lgkmcnt(0)
; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v8i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load <8 x i32>, ptr addrspace(1) %ptr
ret <8 x i32> %val
}
define <16 x i32> @v16i32_func_void() #0 {
; GFX789-LABEL: v16i32_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: s_waitcnt lgkmcnt(0)
; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
; GFX789-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
; GFX789-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v16i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 offset:32
; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[0:3], 0 offset:48
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load <16 x i32>, ptr addrspace(1) %ptr
ret <16 x i32> %val
}
define <32 x i32> @v32i32_func_void() #0 {
; GFX789-LABEL: v32i32_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: s_waitcnt lgkmcnt(0)
; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
; GFX789-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
; GFX789-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
; GFX789-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
; GFX789-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
; GFX789-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
; GFX789-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v32i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x7
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 offset:32
; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[0:3], 0 offset:48
; GFX11-NEXT: buffer_load_b128 v[16:19], off, s[0:3], 0 offset:64
; GFX11-NEXT: buffer_load_b128 v[20:23], off, s[0:3], 0 offset:80
; GFX11-NEXT: buffer_load_b128 v[24:27], off, s[0:3], 0 offset:96
; GFX11-NEXT: buffer_load_b128 v[28:31], off, s[0:3], 0 offset:112
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load <32 x i32>, ptr addrspace(1) %ptr
ret <32 x i32> %val
}
define <2 x i64> @v2i64_func_void() #0 {
; GFX789-LABEL: v2i64_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v2i64_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load <2 x i64>, ptr addrspace(1) undef
ret <2 x i64> %val
}
define <3 x i64> @v3i64_func_void() #0 {
; GFX789-LABEL: v3i64_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: s_waitcnt lgkmcnt(0)
; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT: buffer_load_dwordx2 v[4:5], off, s[4:7], 0 offset:16
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v3i64_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: buffer_load_b64 v[4:5], off, s[0:3], 0 offset:16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load <3 x i64>, ptr addrspace(1) %ptr
ret <3 x i64> %val
}
define <4 x i64> @v4i64_func_void() #0 {
; GFX789-LABEL: v4i64_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: s_waitcnt lgkmcnt(0)
; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v4i64_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load <4 x i64>, ptr addrspace(1) %ptr
ret <4 x i64> %val
}
define <5 x i64> @v5i64_func_void() #0 {
; GFX789-LABEL: v5i64_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: s_waitcnt lgkmcnt(0)
; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
; GFX789-NEXT: buffer_load_dwordx2 v[8:9], off, s[4:7], 0 offset:32
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v5i64_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x2
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
; GFX11-NEXT: buffer_load_b64 v[8:9], off, s[0:3], 0 offset:32
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load <5 x i64>, ptr addrspace(1) %ptr
ret <5 x i64> %val
}
define <8 x i64> @v8i64_func_void() #0 {
; GFX789-LABEL: v8i64_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: s_waitcnt lgkmcnt(0)
; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
; GFX789-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
; GFX789-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v8i64_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 offset:32
; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[0:3], 0 offset:48
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load <8 x i64>, ptr addrspace(1) %ptr
ret <8 x i64> %val
}
define <16 x i64> @v16i64_func_void() #0 {
; GFX789-LABEL: v16i64_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: s_waitcnt lgkmcnt(0)
; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
; GFX789-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
; GFX789-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
; GFX789-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
; GFX789-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
; GFX789-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
; GFX789-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v16i64_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x7
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 offset:32
; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[0:3], 0 offset:48
; GFX11-NEXT: buffer_load_b128 v[16:19], off, s[0:3], 0 offset:64
; GFX11-NEXT: buffer_load_b128 v[20:23], off, s[0:3], 0 offset:80
; GFX11-NEXT: buffer_load_b128 v[24:27], off, s[0:3], 0 offset:96
; GFX11-NEXT: buffer_load_b128 v[28:31], off, s[0:3], 0 offset:112
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load <16 x i64>, ptr addrspace(1) %ptr
ret <16 x i64> %val
}
define <2 x i16> @v2i16_func_void() #0 {
; CI-LABEL: v2i16_func_void:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_load_dword v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v2i16_func_void:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_load_dword v0, off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v2i16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load <2 x i16>, ptr addrspace(1) undef
ret <2 x i16> %val
}
define <3 x i16> @v3i16_func_void() #0 {
; CI-LABEL: v3i16_func_void:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_load_dwordx2 v[2:3], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_alignbit_b32 v1, v3, v2, 16
; CI-NEXT: v_mov_b32_e32 v0, v2
; CI-NEXT: v_mov_b32_e32 v2, v3
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v3i16_func_void:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v3i16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load <3 x i16>, ptr addrspace(1) undef
ret <3 x i16> %val
}
define <4 x i16> @v4i16_func_void() #0 {
; CI-LABEL: v4i16_func_void:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; CI-NEXT: v_mov_b32_e32 v2, v1
; CI-NEXT: v_mov_b32_e32 v1, v4
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v4i16_func_void:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v4i16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load <4 x i16>, ptr addrspace(1) undef
ret <4 x i16> %val
}
define <4 x half> @v4f16_func_void() #0 {
; CI-LABEL: v4f16_func_void:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_load_dwordx2 v[3:4], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_cvt_f32_f16_e32 v0, v3
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v3
; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v4
; CI-NEXT: v_cvt_f32_f16_e32 v2, v4
; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v4f16_func_void:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v4f16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load <4 x half>, ptr addrspace(1) undef
ret <4 x half> %val
}
; FIXME: Mixing buffer and global
; FIXME: Should not scalarize
define <5 x i16> @v5i16_func_void() #0 {
; CI-LABEL: v5i16_func_void:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; CI-NEXT: buffer_load_sshort v4, off, s[4:7], 0 offset:8
; CI-NEXT: s_waitcnt vmcnt(1)
; CI-NEXT: v_alignbit_b32 v5, v1, v0, 16
; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; CI-NEXT: v_mov_b32_e32 v2, v1
; CI-NEXT: v_mov_b32_e32 v1, v5
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v5i16_func_void:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: s_waitcnt lgkmcnt(0)
; GFX89-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v5i16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load <5 x i16>, ptr addrspace(1) %ptr
ret <5 x i16> %val
}
define <8 x i16> @v8i16_func_void() #0 {
; CI-LABEL: v8i16_func_void:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v8
; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v9
; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v10
; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v11
; CI-NEXT: v_mov_b32_e32 v0, v8
; CI-NEXT: v_mov_b32_e32 v2, v9
; CI-NEXT: v_mov_b32_e32 v4, v10
; CI-NEXT: v_mov_b32_e32 v6, v11
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v8i16_func_void:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: s_waitcnt lgkmcnt(0)
; GFX89-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v8i16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load <8 x i16>, ptr addrspace(1) %ptr
ret <8 x i16> %val
}
define <16 x i16> @v16i16_func_void() #0 {
; CI-LABEL: v16i16_func_void:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_dwordx4 v[22:25], off, s[4:7], 0
; CI-NEXT: buffer_load_dwordx4 v[18:21], off, s[4:7], 0 offset:16
; CI-NEXT: s_waitcnt vmcnt(1)
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v22
; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v23
; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v24
; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v25
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshrrev_b32_e32 v9, 16, v18
; CI-NEXT: v_lshrrev_b32_e32 v11, 16, v19
; CI-NEXT: v_lshrrev_b32_e32 v13, 16, v20
; CI-NEXT: v_lshrrev_b32_e32 v15, 16, v21
; CI-NEXT: v_mov_b32_e32 v0, v22
; CI-NEXT: v_mov_b32_e32 v2, v23
; CI-NEXT: v_mov_b32_e32 v4, v24
; CI-NEXT: v_mov_b32_e32 v6, v25
; CI-NEXT: v_mov_b32_e32 v8, v18
; CI-NEXT: v_mov_b32_e32 v10, v19
; CI-NEXT: v_mov_b32_e32 v12, v20
; CI-NEXT: v_mov_b32_e32 v14, v21
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v16i16_func_void:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: s_waitcnt lgkmcnt(0)
; GFX89-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX89-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v16i16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load <16 x i16>, ptr addrspace(1) %ptr
ret <16 x i16> %val
}
; FIXME: Should pack
define <16 x i8> @v16i8_func_void() #0 {
; GFX789-LABEL: v16i8_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: s_waitcnt lgkmcnt(0)
; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: v_lshrrev_b32_e32 v16, 8, v0
; GFX789-NEXT: v_lshrrev_b32_e32 v17, 16, v0
; GFX789-NEXT: v_lshrrev_b32_e32 v18, 24, v0
; GFX789-NEXT: v_lshrrev_b32_e32 v5, 8, v1
; GFX789-NEXT: v_lshrrev_b32_e32 v6, 16, v1
; GFX789-NEXT: v_lshrrev_b32_e32 v7, 24, v1
; GFX789-NEXT: v_lshrrev_b32_e32 v9, 8, v2
; GFX789-NEXT: v_lshrrev_b32_e32 v10, 16, v2
; GFX789-NEXT: v_lshrrev_b32_e32 v11, 24, v2
; GFX789-NEXT: v_lshrrev_b32_e32 v13, 8, v3
; GFX789-NEXT: v_lshrrev_b32_e32 v14, 16, v3
; GFX789-NEXT: v_lshrrev_b32_e32 v15, 24, v3
; GFX789-NEXT: v_mov_b32_e32 v4, v1
; GFX789-NEXT: v_mov_b32_e32 v8, v2
; GFX789-NEXT: v_mov_b32_e32 v12, v3
; GFX789-NEXT: v_mov_b32_e32 v1, v16
; GFX789-NEXT: v_mov_b32_e32 v2, v17
; GFX789-NEXT: v_mov_b32_e32 v3, v18
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v16i8_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v16, 8, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v18, 24, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1
; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1
; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1
; GFX11-NEXT: v_lshrrev_b32_e32 v9, 8, v2
; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v2
; GFX11-NEXT: v_lshrrev_b32_e32 v11, 24, v2
; GFX11-NEXT: v_lshrrev_b32_e32 v13, 8, v3
; GFX11-NEXT: v_lshrrev_b32_e32 v14, 16, v3
; GFX11-NEXT: v_lshrrev_b32_e32 v15, 24, v3
; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v16
; GFX11-NEXT: v_mov_b32_e32 v8, v2
; GFX11-NEXT: v_dual_mov_b32 v12, v3 :: v_dual_mov_b32 v3, v18
; GFX11-NEXT: v_mov_b32_e32 v2, v17
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load <16 x i8>, ptr addrspace(1) %ptr
ret <16 x i8> %val
}
; FIXME: Should pack
define <4 x i8> @v4i8_func_void() #0 {
; GFX789-LABEL: v4i8_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: s_waitcnt lgkmcnt(0)
; GFX789-NEXT: buffer_load_dword v0, off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX789-NEXT: v_lshrrev_b32_e32 v1, 8, v0
; GFX789-NEXT: v_lshrrev_b32_e32 v3, 24, v0
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v4i8_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load <4 x i8>, ptr addrspace(1) %ptr
ret <4 x i8> %val
}
define {i8, i32} @struct_i8_i32_func_void() #0 {
; GFX789-LABEL: struct_i8_i32_func_void:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_ubyte v0, off, s[4:7], 0
; GFX789-NEXT: buffer_load_dword v1, off, s[4:7], 0
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: struct_i8_i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0
; GFX11-NEXT: buffer_load_b32 v1, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load { i8, i32 }, ptr addrspace(1) undef
ret { i8, i32 } %val
}
define void @void_func_sret_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }) %arg0) #0 {
; GFX789-LABEL: void_func_sret_struct_i8_i32:
; GFX789: ; %bb.0:
; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT: s_mov_b32 s7, 0xf000
; GFX789-NEXT: s_mov_b32 s6, -1
; GFX789-NEXT: buffer_load_ubyte v1, off, s[4:7], 0 glc
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: buffer_load_dword v2, off, s[4:7], 0 glc
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen
; GFX789-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; GFX789-NEXT: s_waitcnt vmcnt(0)
; GFX789-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_sret_struct_i8_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_u8 v1, off, s[0:3], 0 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: buffer_load_b32 v2, off, s[0:3], 0 glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_store_b8 v0, v1, off
; GFX11-NEXT: scratch_store_b32 v0, v2, off offset:4
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load volatile i8, ptr addrspace(1) undef
%val1 = load volatile i32, ptr addrspace(1) undef
%gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 0
%gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 1
store i8 %val0, ptr addrspace(5) %gep0
store i32 %val1, ptr addrspace(5) %gep1
ret void
}
; FIXME: Should be able to fold offsets in all of these pre-gfx9. Call
; lowering introduces an extra CopyToReg/CopyFromReg obscuring the
; AssertZext inserted. Not using it introduces the spills.
define <33 x i32> @v33i32_func_void() #0 {
; CI-LABEL: v33i32_func_void:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: v_add_i32_e32 v34, vcc, 0x80, v0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_dword v33, off, s[4:7], 0 offset:128
; CI-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112
; CI-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96
; CI-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80
; CI-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64
; CI-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48
; CI-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32
; CI-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16
; CI-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(8)
; CI-NEXT: buffer_store_dword v33, v34, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v33, vcc, 0x7c, v0
; CI-NEXT: s_waitcnt vmcnt(8)
; CI-NEXT: buffer_store_dword v4, v33, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v4, vcc, 0x78, v0
; CI-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v3, vcc, 0x74, v0
; CI-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v2, vcc, 0x70, v0
; CI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 0x6c, v0
; CI-NEXT: v_add_i32_e32 v2, vcc, 0x68, v0
; CI-NEXT: v_add_i32_e32 v3, vcc, 0x64, v0
; CI-NEXT: s_waitcnt vmcnt(11)
; CI-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 0x60, v0
; CI-NEXT: buffer_store_dword v7, v2, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v2, vcc, 0x5c, v0
; CI-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v3, vcc, 0x58, v0
; CI-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 0x54, v0
; CI-NEXT: v_add_i32_e32 v4, vcc, 0x50, v0
; CI-NEXT: v_add_i32_e32 v5, vcc, 0x4c, v0
; CI-NEXT: s_waitcnt vmcnt(14)
; CI-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v11, v3, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 56, v0
; CI-NEXT: v_add_i32_e32 v6, vcc, 0x48, v0
; CI-NEXT: v_add_i32_e32 v7, vcc, 0x44, v0
; CI-NEXT: v_add_i32_e32 v2, vcc, 64, v0
; CI-NEXT: v_add_i32_e32 v3, vcc, 60, v0
; CI-NEXT: buffer_store_dword v9, v4, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v4, vcc, 52, v0
; CI-NEXT: v_add_i32_e32 v8, vcc, 48, v0
; CI-NEXT: v_add_i32_e32 v9, vcc, 44, v0
; CI-NEXT: v_add_i32_e32 v10, vcc, 40, v0
; CI-NEXT: v_add_i32_e32 v11, vcc, 36, v0
; CI-NEXT: s_waitcnt vmcnt(14)
; CI-NEXT: buffer_store_dword v16, v5, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v15, v6, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v14, v7, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v13, v2, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v20, v3, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v18, v4, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v17, v8, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v24, v9, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v23, v10, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v22, v11, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 32, v0
; CI-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 28, v0
; CI-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 24, v0
; CI-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 20, v0
; CI-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 16, v0
; CI-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 12, v0
; CI-NEXT: s_waitcnt vmcnt(14)
; CI-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 8, v0
; CI-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 4, v0
; CI-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v33i32_func_void:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX8-NEXT: s_mov_b32 s7, 0xf000
; GFX8-NEXT: s_mov_b32 s6, -1
; GFX8-NEXT: v_add_u32_e32 v34, vcc, 0x80, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_load_dword v33, off, s[4:7], 0 offset:128
; GFX8-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112
; GFX8-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96
; GFX8-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80
; GFX8-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64
; GFX8-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48
; GFX8-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32
; GFX8-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16
; GFX8-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0
; GFX8-NEXT: s_waitcnt vmcnt(8)
; GFX8-NEXT: buffer_store_dword v33, v34, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v33, vcc, 0x7c, v0
; GFX8-NEXT: s_waitcnt vmcnt(8)
; GFX8-NEXT: buffer_store_dword v4, v33, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x78, v0
; GFX8-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x74, v0
; GFX8-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x70, v0
; GFX8-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x6c, v0
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x68, v0
; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x64, v0
; GFX8-NEXT: s_waitcnt vmcnt(11)
; GFX8-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x60, v0
; GFX8-NEXT: buffer_store_dword v7, v2, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x5c, v0
; GFX8-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x58, v0
; GFX8-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x54, v0
; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x50, v0
; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x4c, v0
; GFX8-NEXT: s_waitcnt vmcnt(14)
; GFX8-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v11, v3, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 56, v0
; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0x48, v0
; GFX8-NEXT: v_add_u32_e32 v7, vcc, 0x44, v0
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 64, v0
; GFX8-NEXT: v_add_u32_e32 v3, vcc, 60, v0
; GFX8-NEXT: buffer_store_dword v9, v4, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v4, vcc, 52, v0
; GFX8-NEXT: v_add_u32_e32 v8, vcc, 48, v0
; GFX8-NEXT: v_add_u32_e32 v9, vcc, 44, v0
; GFX8-NEXT: v_add_u32_e32 v10, vcc, 40, v0
; GFX8-NEXT: v_add_u32_e32 v11, vcc, 36, v0
; GFX8-NEXT: s_waitcnt vmcnt(14)
; GFX8-NEXT: buffer_store_dword v16, v5, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v15, v6, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v14, v7, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v13, v2, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v20, v3, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v18, v4, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v17, v8, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v24, v9, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v23, v10, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v22, v11, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 32, v0
; GFX8-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 28, v0
; GFX8-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 24, v0
; GFX8-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 20, v0
; GFX8-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 16, v0
; GFX8-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 12, v0
; GFX8-NEXT: s_waitcnt vmcnt(14)
; GFX8-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 8, v0
; GFX8-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 4, v0
; GFX8-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v33i32_func_void:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112
; GFX9-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96
; GFX9-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80
; GFX9-NEXT: buffer_load_dword v33, off, s[4:7], 0 offset:128
; GFX9-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64
; GFX9-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48
; GFX9-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32
; GFX9-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16
; GFX9-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(8)
; GFX9-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:124
; GFX9-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:120
; GFX9-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:116
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:112
; GFX9-NEXT: s_waitcnt vmcnt(11)
; GFX9-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:108
; GFX9-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:104
; GFX9-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:100
; GFX9-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:96
; GFX9-NEXT: s_waitcnt vmcnt(14)
; GFX9-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen offset:92
; GFX9-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen offset:88
; GFX9-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:84
; GFX9-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:80
; GFX9-NEXT: s_waitcnt vmcnt(17)
; GFX9-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen offset:128
; GFX9-NEXT: s_waitcnt vmcnt(17)
; GFX9-NEXT: buffer_store_dword v16, v0, s[0:3], 0 offen offset:76
; GFX9-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen offset:72
; GFX9-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen offset:68
; GFX9-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen offset:64
; GFX9-NEXT: s_waitcnt vmcnt(20)
; GFX9-NEXT: buffer_store_dword v20, v0, s[0:3], 0 offen offset:60
; GFX9-NEXT: buffer_store_dword v19, v0, s[0:3], 0 offen offset:56
; GFX9-NEXT: buffer_store_dword v18, v0, s[0:3], 0 offen offset:52
; GFX9-NEXT: buffer_store_dword v17, v0, s[0:3], 0 offen offset:48
; GFX9-NEXT: s_waitcnt vmcnt(23)
; GFX9-NEXT: buffer_store_dword v24, v0, s[0:3], 0 offen offset:44
; GFX9-NEXT: buffer_store_dword v23, v0, s[0:3], 0 offen offset:40
; GFX9-NEXT: buffer_store_dword v22, v0, s[0:3], 0 offen offset:36
; GFX9-NEXT: buffer_store_dword v21, v0, s[0:3], 0 offen offset:32
; GFX9-NEXT: s_waitcnt vmcnt(26)
; GFX9-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen offset:28
; GFX9-NEXT: buffer_store_dword v27, v0, s[0:3], 0 offen offset:24
; GFX9-NEXT: buffer_store_dword v26, v0, s[0:3], 0 offen offset:20
; GFX9-NEXT: buffer_store_dword v25, v0, s[0:3], 0 offen offset:16
; GFX9-NEXT: s_waitcnt vmcnt(29)
; GFX9-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen offset:12
; GFX9-NEXT: buffer_store_dword v31, v0, s[0:3], 0 offen offset:8
; GFX9-NEXT: buffer_store_dword v30, v0, s[0:3], 0 offen offset:4
; GFX9-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v33i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x8
; GFX11-NEXT: buffer_load_b128 v[1:4], off, s[0:3], 0 offset:112
; GFX11-NEXT: buffer_load_b128 v[5:8], off, s[0:3], 0 offset:96
; GFX11-NEXT: buffer_load_b128 v[9:12], off, s[0:3], 0 offset:80
; GFX11-NEXT: buffer_load_b128 v[13:16], off, s[0:3], 0 offset:64
; GFX11-NEXT: buffer_load_b128 v[17:20], off, s[0:3], 0 offset:48
; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:32
; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 offset:16
; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0
; GFX11-NEXT: buffer_load_b32 v33, off, s[0:3], 0 offset:128
; GFX11-NEXT: s_waitcnt vmcnt(8)
; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:112
; GFX11-NEXT: s_waitcnt vmcnt(7)
; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:96
; GFX11-NEXT: s_waitcnt vmcnt(6)
; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:80
; GFX11-NEXT: s_waitcnt vmcnt(5)
; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:64
; GFX11-NEXT: s_waitcnt vmcnt(4)
; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:48
; GFX11-NEXT: s_waitcnt vmcnt(3)
; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:32
; GFX11-NEXT: s_waitcnt vmcnt(2)
; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:16
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: scratch_store_b32 v0, v33, off offset:128
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load <33 x i32>, ptr addrspace(1) %ptr
ret <33 x i32> %val
}
define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
; CI-LABEL: struct_v32i32_i32_func_void:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: v_add_i32_e32 v34, vcc, 0x80, v0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_dword v33, off, s[4:7], 0 offset:128
; CI-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112
; CI-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96
; CI-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80
; CI-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64
; CI-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48
; CI-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32
; CI-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16
; CI-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(8)
; CI-NEXT: buffer_store_dword v33, v34, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v33, vcc, 0x7c, v0
; CI-NEXT: s_waitcnt vmcnt(8)
; CI-NEXT: buffer_store_dword v4, v33, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v4, vcc, 0x78, v0
; CI-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v3, vcc, 0x74, v0
; CI-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v2, vcc, 0x70, v0
; CI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 0x6c, v0
; CI-NEXT: v_add_i32_e32 v2, vcc, 0x68, v0
; CI-NEXT: v_add_i32_e32 v3, vcc, 0x64, v0
; CI-NEXT: s_waitcnt vmcnt(11)
; CI-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 0x60, v0
; CI-NEXT: buffer_store_dword v7, v2, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v2, vcc, 0x5c, v0
; CI-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v3, vcc, 0x58, v0
; CI-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 0x54, v0
; CI-NEXT: v_add_i32_e32 v4, vcc, 0x50, v0
; CI-NEXT: v_add_i32_e32 v5, vcc, 0x4c, v0
; CI-NEXT: s_waitcnt vmcnt(14)
; CI-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v11, v3, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 56, v0
; CI-NEXT: v_add_i32_e32 v6, vcc, 0x48, v0
; CI-NEXT: v_add_i32_e32 v7, vcc, 0x44, v0
; CI-NEXT: v_add_i32_e32 v2, vcc, 64, v0
; CI-NEXT: v_add_i32_e32 v3, vcc, 60, v0
; CI-NEXT: buffer_store_dword v9, v4, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v4, vcc, 52, v0
; CI-NEXT: v_add_i32_e32 v8, vcc, 48, v0
; CI-NEXT: v_add_i32_e32 v9, vcc, 44, v0
; CI-NEXT: v_add_i32_e32 v10, vcc, 40, v0
; CI-NEXT: v_add_i32_e32 v11, vcc, 36, v0
; CI-NEXT: s_waitcnt vmcnt(14)
; CI-NEXT: buffer_store_dword v16, v5, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v15, v6, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v14, v7, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v13, v2, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v20, v3, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v18, v4, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v17, v8, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v24, v9, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v23, v10, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v22, v11, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 32, v0
; CI-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 28, v0
; CI-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 24, v0
; CI-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 20, v0
; CI-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 16, v0
; CI-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 12, v0
; CI-NEXT: s_waitcnt vmcnt(14)
; CI-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 8, v0
; CI-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 4, v0
; CI-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: struct_v32i32_i32_func_void:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX8-NEXT: s_mov_b32 s7, 0xf000
; GFX8-NEXT: s_mov_b32 s6, -1
; GFX8-NEXT: v_add_u32_e32 v34, vcc, 0x80, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_load_dword v33, off, s[4:7], 0 offset:128
; GFX8-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112
; GFX8-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96
; GFX8-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80
; GFX8-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64
; GFX8-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48
; GFX8-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32
; GFX8-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16
; GFX8-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0
; GFX8-NEXT: s_waitcnt vmcnt(8)
; GFX8-NEXT: buffer_store_dword v33, v34, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v33, vcc, 0x7c, v0
; GFX8-NEXT: s_waitcnt vmcnt(8)
; GFX8-NEXT: buffer_store_dword v4, v33, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x78, v0
; GFX8-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x74, v0
; GFX8-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x70, v0
; GFX8-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x6c, v0
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x68, v0
; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x64, v0
; GFX8-NEXT: s_waitcnt vmcnt(11)
; GFX8-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x60, v0
; GFX8-NEXT: buffer_store_dword v7, v2, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x5c, v0
; GFX8-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x58, v0
; GFX8-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x54, v0
; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x50, v0
; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x4c, v0
; GFX8-NEXT: s_waitcnt vmcnt(14)
; GFX8-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v11, v3, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 56, v0
; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0x48, v0
; GFX8-NEXT: v_add_u32_e32 v7, vcc, 0x44, v0
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 64, v0
; GFX8-NEXT: v_add_u32_e32 v3, vcc, 60, v0
; GFX8-NEXT: buffer_store_dword v9, v4, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v4, vcc, 52, v0
; GFX8-NEXT: v_add_u32_e32 v8, vcc, 48, v0
; GFX8-NEXT: v_add_u32_e32 v9, vcc, 44, v0
; GFX8-NEXT: v_add_u32_e32 v10, vcc, 40, v0
; GFX8-NEXT: v_add_u32_e32 v11, vcc, 36, v0
; GFX8-NEXT: s_waitcnt vmcnt(14)
; GFX8-NEXT: buffer_store_dword v16, v5, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v15, v6, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v14, v7, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v13, v2, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v20, v3, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v18, v4, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v17, v8, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v24, v9, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v23, v10, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v22, v11, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 32, v0
; GFX8-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 28, v0
; GFX8-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 24, v0
; GFX8-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 20, v0
; GFX8-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 16, v0
; GFX8-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 12, v0
; GFX8-NEXT: s_waitcnt vmcnt(14)
; GFX8-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 8, v0
; GFX8-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 4, v0
; GFX8-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: struct_v32i32_i32_func_void:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112
; GFX9-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96
; GFX9-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80
; GFX9-NEXT: buffer_load_dword v33, off, s[4:7], 0 offset:128
; GFX9-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64
; GFX9-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48
; GFX9-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32
; GFX9-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16
; GFX9-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0
; GFX9-NEXT: s_waitcnt vmcnt(8)
; GFX9-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:124
; GFX9-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:120
; GFX9-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:116
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:112
; GFX9-NEXT: s_waitcnt vmcnt(11)
; GFX9-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:108
; GFX9-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:104
; GFX9-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:100
; GFX9-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:96
; GFX9-NEXT: s_waitcnt vmcnt(14)
; GFX9-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen offset:92
; GFX9-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen offset:88
; GFX9-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:84
; GFX9-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:80
; GFX9-NEXT: s_waitcnt vmcnt(17)
; GFX9-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen offset:128
; GFX9-NEXT: s_waitcnt vmcnt(17)
; GFX9-NEXT: buffer_store_dword v16, v0, s[0:3], 0 offen offset:76
; GFX9-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen offset:72
; GFX9-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen offset:68
; GFX9-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen offset:64
; GFX9-NEXT: s_waitcnt vmcnt(20)
; GFX9-NEXT: buffer_store_dword v20, v0, s[0:3], 0 offen offset:60
; GFX9-NEXT: buffer_store_dword v19, v0, s[0:3], 0 offen offset:56
; GFX9-NEXT: buffer_store_dword v18, v0, s[0:3], 0 offen offset:52
; GFX9-NEXT: buffer_store_dword v17, v0, s[0:3], 0 offen offset:48
; GFX9-NEXT: s_waitcnt vmcnt(23)
; GFX9-NEXT: buffer_store_dword v24, v0, s[0:3], 0 offen offset:44
; GFX9-NEXT: buffer_store_dword v23, v0, s[0:3], 0 offen offset:40
; GFX9-NEXT: buffer_store_dword v22, v0, s[0:3], 0 offen offset:36
; GFX9-NEXT: buffer_store_dword v21, v0, s[0:3], 0 offen offset:32
; GFX9-NEXT: s_waitcnt vmcnt(26)
; GFX9-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen offset:28
; GFX9-NEXT: buffer_store_dword v27, v0, s[0:3], 0 offen offset:24
; GFX9-NEXT: buffer_store_dword v26, v0, s[0:3], 0 offen offset:20
; GFX9-NEXT: buffer_store_dword v25, v0, s[0:3], 0 offen offset:16
; GFX9-NEXT: s_waitcnt vmcnt(29)
; GFX9-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen offset:12
; GFX9-NEXT: buffer_store_dword v31, v0, s[0:3], 0 offen offset:8
; GFX9-NEXT: buffer_store_dword v30, v0, s[0:3], 0 offen offset:4
; GFX9-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: struct_v32i32_i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x8
; GFX11-NEXT: buffer_load_b128 v[1:4], off, s[0:3], 0 offset:112
; GFX11-NEXT: buffer_load_b128 v[5:8], off, s[0:3], 0 offset:96
; GFX11-NEXT: buffer_load_b128 v[9:12], off, s[0:3], 0 offset:80
; GFX11-NEXT: buffer_load_b128 v[13:16], off, s[0:3], 0 offset:64
; GFX11-NEXT: buffer_load_b128 v[17:20], off, s[0:3], 0 offset:48
; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:32
; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 offset:16
; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0
; GFX11-NEXT: buffer_load_b32 v33, off, s[0:3], 0 offset:128
; GFX11-NEXT: s_waitcnt vmcnt(8)
; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:112
; GFX11-NEXT: s_waitcnt vmcnt(7)
; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:96
; GFX11-NEXT: s_waitcnt vmcnt(6)
; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:80
; GFX11-NEXT: s_waitcnt vmcnt(5)
; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:64
; GFX11-NEXT: s_waitcnt vmcnt(4)
; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:48
; GFX11-NEXT: s_waitcnt vmcnt(3)
; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:32
; GFX11-NEXT: s_waitcnt vmcnt(2)
; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:16
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: scratch_store_b32 v0, v33, off offset:128
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load { <32 x i32>, i32 }, ptr addrspace(1) %ptr
ret { <32 x i32>, i32 }%val
}
define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
; CI-LABEL: struct_i32_v32i32_func_void:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_dword v33, off, s[4:7], 0
; CI-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:240
; CI-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:224
; CI-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:208
; CI-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:192
; CI-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:176
; CI-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:160
; CI-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:144
; CI-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0 offset:128
; CI-NEXT: s_waitcnt vmcnt(8)
; CI-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v33, vcc, 0xfc, v0
; CI-NEXT: s_waitcnt vmcnt(8)
; CI-NEXT: buffer_store_dword v4, v33, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v4, vcc, 0xf8, v0
; CI-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v3, vcc, 0xf4, v0
; CI-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v2, vcc, 0xf0, v0
; CI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 0xec, v0
; CI-NEXT: v_add_i32_e32 v2, vcc, 0xe8, v0
; CI-NEXT: v_add_i32_e32 v3, vcc, 0xe4, v0
; CI-NEXT: s_waitcnt vmcnt(11)
; CI-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 0xe0, v0
; CI-NEXT: buffer_store_dword v7, v2, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v2, vcc, 0xdc, v0
; CI-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v3, vcc, 0xd8, v0
; CI-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 0xd4, v0
; CI-NEXT: v_add_i32_e32 v4, vcc, 0xd0, v0
; CI-NEXT: v_add_i32_e32 v5, vcc, 0xcc, v0
; CI-NEXT: v_add_i32_e32 v6, vcc, 0xc8, v0
; CI-NEXT: s_waitcnt vmcnt(14)
; CI-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v11, v3, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 0xb8, v0
; CI-NEXT: v_add_i32_e32 v7, vcc, 0xc4, v0
; CI-NEXT: v_add_i32_e32 v2, vcc, 0xc0, v0
; CI-NEXT: v_add_i32_e32 v3, vcc, 0xbc, v0
; CI-NEXT: buffer_store_dword v9, v4, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v4, vcc, 0xb4, v0
; CI-NEXT: v_add_i32_e32 v8, vcc, 0xb0, v0
; CI-NEXT: v_add_i32_e32 v9, vcc, 0xac, v0
; CI-NEXT: v_add_i32_e32 v10, vcc, 0xa8, v0
; CI-NEXT: v_add_i32_e32 v11, vcc, 0xa4, v0
; CI-NEXT: s_waitcnt vmcnt(14)
; CI-NEXT: buffer_store_dword v16, v5, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v5, vcc, 0xa0, v0
; CI-NEXT: buffer_store_dword v15, v6, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v14, v7, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v13, v2, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v20, v3, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v18, v4, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v17, v8, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v24, v9, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v23, v10, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v22, v11, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v21, v5, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 0x9c, v0
; CI-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 0x98, v0
; CI-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 0x94, v0
; CI-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 0x90, v0
; CI-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 0x8c, v0
; CI-NEXT: s_waitcnt vmcnt(14)
; CI-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 0x88, v0
; CI-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen
; CI-NEXT: v_add_i32_e32 v1, vcc, 0x84, v0
; CI-NEXT: v_add_i32_e32 v0, vcc, 0x80, v0
; CI-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen
; CI-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: struct_i32_v32i32_func_void:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX8-NEXT: s_mov_b32 s7, 0xf000
; GFX8-NEXT: s_mov_b32 s6, -1
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_load_dword v33, off, s[4:7], 0
; GFX8-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:240
; GFX8-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:224
; GFX8-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:208
; GFX8-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:192
; GFX8-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:176
; GFX8-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:160
; GFX8-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:144
; GFX8-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0 offset:128
; GFX8-NEXT: s_waitcnt vmcnt(8)
; GFX8-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v33, vcc, 0xfc, v0
; GFX8-NEXT: s_waitcnt vmcnt(8)
; GFX8-NEXT: buffer_store_dword v4, v33, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0xf8, v0
; GFX8-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xf4, v0
; GFX8-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xf0, v0
; GFX8-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0xec, v0
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xe8, v0
; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xe4, v0
; GFX8-NEXT: s_waitcnt vmcnt(11)
; GFX8-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0xe0, v0
; GFX8-NEXT: buffer_store_dword v7, v2, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xdc, v0
; GFX8-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xd8, v0
; GFX8-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0xd4, v0
; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0xd0, v0
; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0xcc, v0
; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0xc8, v0
; GFX8-NEXT: s_waitcnt vmcnt(14)
; GFX8-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v11, v3, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0xb8, v0
; GFX8-NEXT: v_add_u32_e32 v7, vcc, 0xc4, v0
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xc0, v0
; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xbc, v0
; GFX8-NEXT: buffer_store_dword v9, v4, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0xb4, v0
; GFX8-NEXT: v_add_u32_e32 v8, vcc, 0xb0, v0
; GFX8-NEXT: v_add_u32_e32 v9, vcc, 0xac, v0
; GFX8-NEXT: v_add_u32_e32 v10, vcc, 0xa8, v0
; GFX8-NEXT: v_add_u32_e32 v11, vcc, 0xa4, v0
; GFX8-NEXT: s_waitcnt vmcnt(14)
; GFX8-NEXT: buffer_store_dword v16, v5, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0xa0, v0
; GFX8-NEXT: buffer_store_dword v15, v6, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v14, v7, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v13, v2, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v20, v3, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v18, v4, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v17, v8, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v24, v9, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v23, v10, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v22, v11, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v21, v5, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x9c, v0
; GFX8-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x98, v0
; GFX8-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x94, v0
; GFX8-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x90, v0
; GFX8-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x8c, v0
; GFX8-NEXT: s_waitcnt vmcnt(14)
; GFX8-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x88, v0
; GFX8-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen
; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x84, v0
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x80, v0
; GFX8-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen
; GFX8-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: struct_i32_v32i32_func_void:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:240
; GFX9-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:224
; GFX9-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:208
; GFX9-NEXT: buffer_load_dword v33, off, s[4:7], 0
; GFX9-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:192
; GFX9-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:176
; GFX9-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:160
; GFX9-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:144
; GFX9-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0 offset:128
; GFX9-NEXT: s_waitcnt vmcnt(8)
; GFX9-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:252
; GFX9-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:248
; GFX9-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:244
; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:240
; GFX9-NEXT: s_waitcnt vmcnt(11)
; GFX9-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:236
; GFX9-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:232
; GFX9-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:228
; GFX9-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:224
; GFX9-NEXT: s_waitcnt vmcnt(14)
; GFX9-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen offset:220
; GFX9-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen offset:216
; GFX9-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:212
; GFX9-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:208
; GFX9-NEXT: s_waitcnt vmcnt(17)
; GFX9-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen
; GFX9-NEXT: s_waitcnt vmcnt(17)
; GFX9-NEXT: buffer_store_dword v16, v0, s[0:3], 0 offen offset:204
; GFX9-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen offset:200
; GFX9-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen offset:196
; GFX9-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen offset:192
; GFX9-NEXT: s_waitcnt vmcnt(20)
; GFX9-NEXT: buffer_store_dword v20, v0, s[0:3], 0 offen offset:188
; GFX9-NEXT: buffer_store_dword v19, v0, s[0:3], 0 offen offset:184
; GFX9-NEXT: buffer_store_dword v18, v0, s[0:3], 0 offen offset:180
; GFX9-NEXT: buffer_store_dword v17, v0, s[0:3], 0 offen offset:176
; GFX9-NEXT: s_waitcnt vmcnt(23)
; GFX9-NEXT: buffer_store_dword v24, v0, s[0:3], 0 offen offset:172
; GFX9-NEXT: buffer_store_dword v23, v0, s[0:3], 0 offen offset:168
; GFX9-NEXT: buffer_store_dword v22, v0, s[0:3], 0 offen offset:164
; GFX9-NEXT: buffer_store_dword v21, v0, s[0:3], 0 offen offset:160
; GFX9-NEXT: s_waitcnt vmcnt(26)
; GFX9-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen offset:156
; GFX9-NEXT: buffer_store_dword v27, v0, s[0:3], 0 offen offset:152
; GFX9-NEXT: buffer_store_dword v26, v0, s[0:3], 0 offen offset:148
; GFX9-NEXT: buffer_store_dword v25, v0, s[0:3], 0 offen offset:144
; GFX9-NEXT: s_waitcnt vmcnt(29)
; GFX9-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen offset:140
; GFX9-NEXT: buffer_store_dword v31, v0, s[0:3], 0 offen offset:136
; GFX9-NEXT: buffer_store_dword v30, v0, s[0:3], 0 offen offset:132
; GFX9-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen offset:128
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: struct_i32_v32i32_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x8
; GFX11-NEXT: buffer_load_b128 v[1:4], off, s[0:3], 0 offset:240
; GFX11-NEXT: buffer_load_b128 v[5:8], off, s[0:3], 0 offset:224
; GFX11-NEXT: buffer_load_b128 v[9:12], off, s[0:3], 0 offset:208
; GFX11-NEXT: buffer_load_b128 v[13:16], off, s[0:3], 0 offset:192
; GFX11-NEXT: buffer_load_b128 v[17:20], off, s[0:3], 0 offset:176
; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:160
; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 offset:144
; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 offset:128
; GFX11-NEXT: buffer_load_b32 v33, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(8)
; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:240
; GFX11-NEXT: s_waitcnt vmcnt(7)
; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:224
; GFX11-NEXT: s_waitcnt vmcnt(6)
; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:208
; GFX11-NEXT: s_waitcnt vmcnt(5)
; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:192
; GFX11-NEXT: s_waitcnt vmcnt(4)
; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:176
; GFX11-NEXT: s_waitcnt vmcnt(3)
; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:160
; GFX11-NEXT: s_waitcnt vmcnt(2)
; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:144
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off offset:128
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: scratch_store_b32 v0, v33, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load { i32, <32 x i32> }, ptr addrspace(1) %ptr
ret { i32, <32 x i32> }%val
}
; Make sure the last struct component is returned in v3, not v4.
define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 {
; CI-LABEL: v3i32_struct_func_void_wasted_reg:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: ds_read_b32 v0, v0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: ds_read_b32 v1, v0
; CI-NEXT: ds_read_b32 v2, v0
; CI-NEXT: ds_read_b32 v3, v0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v3i32_struct_func_void_wasted_reg:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 m0, -1
; GFX8-NEXT: ds_read_b32 v0, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: ds_read_b32 v1, v0
; GFX8-NEXT: ds_read_b32 v2, v0
; GFX8-NEXT: ds_read_b32 v3, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v3i32_struct_func_void_wasted_reg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ds_read_b32 v0, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: ds_read_b32 v1, v0
; GFX9-NEXT: ds_read_b32 v2, v0
; GFX9-NEXT: ds_read_b32 v3, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v3i32_struct_func_void_wasted_reg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: ds_load_b32 v0, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ds_load_b32 v1, v0
; GFX11-NEXT: ds_load_b32 v2, v0
; GFX11-NEXT: ds_load_b32 v3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%load0 = load volatile i32, ptr addrspace(3) undef
%load1 = load volatile i32, ptr addrspace(3) undef
%load2 = load volatile i32, ptr addrspace(3) undef
%load3 = load volatile i32, ptr addrspace(3) undef
%insert.0 = insertelement <3 x i32> undef, i32 %load0, i32 0
%insert.1 = insertelement <3 x i32> %insert.0, i32 %load1, i32 1
%insert.2 = insertelement <3 x i32> %insert.1, i32 %load2, i32 2
%insert.3 = insertvalue { <3 x i32>, i32 } undef, <3 x i32> %insert.2, 0
%insert.4 = insertvalue { <3 x i32>, i32 } %insert.3, i32 %load3, 1
ret { <3 x i32>, i32 } %insert.4
}
define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 {
; CI-LABEL: v3f32_struct_func_void_wasted_reg:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: ds_read_b32 v0, v0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: ds_read_b32 v1, v0
; CI-NEXT: ds_read_b32 v2, v0
; CI-NEXT: ds_read_b32 v3, v0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v3f32_struct_func_void_wasted_reg:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 m0, -1
; GFX8-NEXT: ds_read_b32 v0, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: ds_read_b32 v1, v0
; GFX8-NEXT: ds_read_b32 v2, v0
; GFX8-NEXT: ds_read_b32 v3, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v3f32_struct_func_void_wasted_reg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ds_read_b32 v0, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: ds_read_b32 v1, v0
; GFX9-NEXT: ds_read_b32 v2, v0
; GFX9-NEXT: ds_read_b32 v3, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v3f32_struct_func_void_wasted_reg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: ds_load_b32 v0, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ds_load_b32 v1, v0
; GFX11-NEXT: ds_load_b32 v2, v0
; GFX11-NEXT: ds_load_b32 v3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%load0 = load volatile float, ptr addrspace(3) undef
%load1 = load volatile float, ptr addrspace(3) undef
%load2 = load volatile float, ptr addrspace(3) undef
%load3 = load volatile i32, ptr addrspace(3) undef
%insert.0 = insertelement <3 x float> undef, float %load0, i32 0
%insert.1 = insertelement <3 x float> %insert.0, float %load1, i32 1
%insert.2 = insertelement <3 x float> %insert.1, float %load2, i32 2
%insert.3 = insertvalue { <3 x float>, i32 } undef, <3 x float> %insert.2, 0
%insert.4 = insertvalue { <3 x float>, i32 } %insert.3, i32 %load3, 1
ret { <3 x float>, i32 } %insert.4
}
define void @void_func_sret_max_known_zero_bits(ptr addrspace(5) sret(i8) %arg0) #0 {
; CI-LABEL: void_func_sret_max_known_zero_bits:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: ds_write_b32 v0, v0
; CI-NEXT: v_mov_b32_e32 v0, 0
; CI-NEXT: ds_write_b32 v0, v0
; CI-NEXT: ds_write_b32 v0, v0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: void_func_sret_max_known_zero_bits:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX8-NEXT: s_mov_b32 m0, -1
; GFX8-NEXT: ds_write_b32 v0, v0
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: ds_write_b32 v0, v0
; GFX8-NEXT: ds_write_b32 v0, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_sret_max_known_zero_bits:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX9-NEXT: ds_write_b32 v0, v0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: ds_write_b32 v0, v0
; GFX9-NEXT: ds_write_b32 v0, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_sret_max_known_zero_bits:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 17, v0
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: ds_store_b32 v0, v1
; GFX11-NEXT: ds_store_b32 v0, v0
; GFX11-NEXT: ds_store_b32 v0, v2
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%arg0.int = ptrtoint ptr addrspace(5) %arg0 to i32
%lshr0 = lshr i32 %arg0.int, 16
%lshr1 = lshr i32 %arg0.int, 17
%lshr2 = lshr i32 %arg0.int, 18
store volatile i32 %lshr0, ptr addrspace(3) undef
store volatile i32 %lshr1, ptr addrspace(3) undef
store volatile i32 %lshr2, ptr addrspace(3) undef
ret void
}
define bfloat @bf16_func_void() #0 {
; CI-LABEL: bf16_func_void:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_load_ushort v0, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: bf16_func_void:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_load_ushort v0, off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: bf16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load bfloat, ptr addrspace(1) undef
ret bfloat %val
}
define <2 x bfloat> @v2bf16_func_void() #0 {
; CI-LABEL: v2bf16_func_void:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_load_dword v1, off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v1
; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v2bf16_func_void:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_load_dword v0, off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v2bf16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load <2 x bfloat>, ptr addrspace(1) undef
ret <2 x bfloat> %val
}
define <3 x bfloat> @v3bf16_func_void() #0 {
; CI-LABEL: v3bf16_func_void:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_load_dwordx2 v[1:2], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v1
; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v3bf16_func_void:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v3bf16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load <3 x bfloat>, ptr addrspace(1) undef
ret <3 x bfloat> %val
}
define <4 x bfloat> @v4bf16_func_void() #0 {
; CI-LABEL: v4bf16_func_void:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_load_dwordx2 v[2:3], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v2
; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v3
; CI-NEXT: v_and_b32_e32 v3, 0xffff0000, v3
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v4bf16_func_void:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v4bf16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load <4 x bfloat>, ptr addrspace(1) undef
ret <4 x bfloat> %val
}
define <6 x bfloat> @v6bf16_func_void() #0 {
; CI-LABEL: v6bf16_func_void:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_load_dwordx3 v[3:5], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v3
; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v3
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v4
; CI-NEXT: v_and_b32_e32 v3, 0xffff0000, v4
; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v5
; CI-NEXT: v_and_b32_e32 v5, 0xffff0000, v5
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v6bf16_func_void:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_load_dwordx3 v[0:2], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v6bf16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b96 v[0:2], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load <6 x bfloat>, ptr addrspace(1) undef
ret <6 x bfloat> %val
}
define <8 x bfloat> @v8bf16_func_void() #0 {
; CI-LABEL: v8bf16_func_void:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v4
; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v4
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v5
; CI-NEXT: v_and_b32_e32 v3, 0xffff0000, v5
; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v6
; CI-NEXT: v_and_b32_e32 v5, 0xffff0000, v6
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v7
; CI-NEXT: v_and_b32_e32 v7, 0xffff0000, v7
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v8bf16_func_void:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v8bf16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load <8 x bfloat>, ptr addrspace(1) undef
ret <8 x bfloat> %val
}
define <16 x bfloat> @v16bf16_func_void() #0 {
; CI-LABEL: v16bf16_func_void:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v4
; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v4
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v5
; CI-NEXT: v_and_b32_e32 v3, 0xffff0000, v5
; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v6
; CI-NEXT: v_and_b32_e32 v5, 0xffff0000, v6
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v7
; CI-NEXT: v_and_b32_e32 v7, 0xffff0000, v7
; CI-NEXT: v_mov_b32_e32 v8, v0
; CI-NEXT: v_mov_b32_e32 v9, v1
; CI-NEXT: v_mov_b32_e32 v10, v2
; CI-NEXT: v_mov_b32_e32 v11, v3
; CI-NEXT: v_mov_b32_e32 v12, v4
; CI-NEXT: v_mov_b32_e32 v13, v5
; CI-NEXT: v_mov_b32_e32 v14, v6
; CI-NEXT: v_mov_b32_e32 v15, v7
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v16bf16_func_void:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: v_mov_b32_e32 v4, v0
; GFX89-NEXT: v_mov_b32_e32 v5, v1
; GFX89-NEXT: v_mov_b32_e32 v6, v2
; GFX89-NEXT: v_mov_b32_e32 v7, v3
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v16bf16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v4, v0 :: v_dual_mov_b32 v5, v1
; GFX11-NEXT: v_dual_mov_b32 v6, v2 :: v_dual_mov_b32 v7, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load <16 x bfloat>, ptr addrspace(1) undef
ret <16 x bfloat> %val
}
define <32 x bfloat> @v32bf16_func_void() #0 {
; CI-LABEL: v32bf16_func_void:
; CI: ; %bb.0:
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v4
; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v4
; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v5
; CI-NEXT: v_and_b32_e32 v3, 0xffff0000, v5
; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v6
; CI-NEXT: v_and_b32_e32 v5, 0xffff0000, v6
; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v7
; CI-NEXT: v_and_b32_e32 v7, 0xffff0000, v7
; CI-NEXT: v_mov_b32_e32 v8, v0
; CI-NEXT: v_mov_b32_e32 v9, v1
; CI-NEXT: v_mov_b32_e32 v10, v2
; CI-NEXT: v_mov_b32_e32 v11, v3
; CI-NEXT: v_mov_b32_e32 v12, v4
; CI-NEXT: v_mov_b32_e32 v13, v5
; CI-NEXT: v_mov_b32_e32 v14, v6
; CI-NEXT: v_mov_b32_e32 v16, v0
; CI-NEXT: v_mov_b32_e32 v17, v1
; CI-NEXT: v_mov_b32_e32 v18, v2
; CI-NEXT: v_mov_b32_e32 v19, v3
; CI-NEXT: v_mov_b32_e32 v20, v4
; CI-NEXT: v_mov_b32_e32 v21, v5
; CI-NEXT: v_mov_b32_e32 v24, v0
; CI-NEXT: v_mov_b32_e32 v25, v1
; CI-NEXT: v_mov_b32_e32 v26, v2
; CI-NEXT: v_mov_b32_e32 v27, v3
; CI-NEXT: v_mov_b32_e32 v28, v4
; CI-NEXT: v_mov_b32_e32 v29, v5
; CI-NEXT: v_mov_b32_e32 v22, v6
; CI-NEXT: v_mov_b32_e32 v30, v6
; CI-NEXT: v_mov_b32_e32 v15, v7
; CI-NEXT: v_mov_b32_e32 v23, v7
; CI-NEXT: v_mov_b32_e32 v31, v7
; CI-NEXT: s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v32bf16_func_void:
; GFX89: ; %bb.0:
; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT: s_mov_b32 s7, 0xf000
; GFX89-NEXT: s_mov_b32 s6, -1
; GFX89-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: v_mov_b32_e32 v4, v0
; GFX89-NEXT: v_mov_b32_e32 v5, v1
; GFX89-NEXT: v_mov_b32_e32 v6, v2
; GFX89-NEXT: v_mov_b32_e32 v7, v3
; GFX89-NEXT: v_mov_b32_e32 v8, v0
; GFX89-NEXT: v_mov_b32_e32 v9, v1
; GFX89-NEXT: v_mov_b32_e32 v10, v2
; GFX89-NEXT: v_mov_b32_e32 v11, v3
; GFX89-NEXT: v_mov_b32_e32 v12, v0
; GFX89-NEXT: v_mov_b32_e32 v13, v1
; GFX89-NEXT: v_mov_b32_e32 v14, v2
; GFX89-NEXT: v_mov_b32_e32 v15, v3
; GFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v32bf16_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v4, v0 :: v_dual_mov_b32 v5, v1
; GFX11-NEXT: v_dual_mov_b32 v6, v2 :: v_dual_mov_b32 v7, v3
; GFX11-NEXT: v_dual_mov_b32 v8, v0 :: v_dual_mov_b32 v9, v1
; GFX11-NEXT: v_dual_mov_b32 v10, v2 :: v_dual_mov_b32 v11, v3
; GFX11-NEXT: v_dual_mov_b32 v12, v0 :: v_dual_mov_b32 v13, v1
; GFX11-NEXT: v_dual_mov_b32 v14, v2 :: v_dual_mov_b32 v15, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = load <32 x bfloat>, ptr addrspace(1) undef
ret <32 x bfloat> %val
}
attributes #0 = { nounwind }