llvm/llvm/test/CodeGen/AMDGPU/function-returns.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX789,CI %s
; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX789,GFX89,GFX8 %s
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX789,GFX89,GFX9 %s
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s

define i1 @i1_func_void() #0 {
; GFX789-LABEL: i1_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_ubyte v0, off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i1_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_u8 v0, off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load i1, ptr addrspace(1) undef
  ret i1 %val
}

; FIXME: Missing and?
define zeroext i1 @i1_zeroext_func_void() #0 {
; GFX789-LABEL: i1_zeroext_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_ubyte v0, off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i1_zeroext_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_u8 v0, off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load i1, ptr addrspace(1) undef
  ret i1 %val
}

define signext i1 @i1_signext_func_void() #0 {
; GFX789-LABEL: i1_signext_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_ubyte v0, off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    v_bfe_i32 v0, v0, 0, 1
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i1_signext_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_u8 v0, off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    v_bfe_i32 v0, v0, 0, 1
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load i1, ptr addrspace(1) undef
  ret i1 %val
}

define i8 @i8_func_void() #0 {
; GFX789-LABEL: i8_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_ubyte v0, off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i8_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_u8 v0, off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load i8, ptr addrspace(1) undef
  ret i8 %val
}

define zeroext i8 @i8_zeroext_func_void() #0 {
; GFX789-LABEL: i8_zeroext_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_ubyte v0, off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i8_zeroext_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_u8 v0, off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load i8, ptr addrspace(1) undef
  ret i8 %val
}

define signext i8 @i8_signext_func_void() #0 {
; GFX789-LABEL: i8_signext_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_sbyte v0, off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i8_signext_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_i8 v0, off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load i8, ptr addrspace(1) undef
  ret i8 %val
}

define i16 @i16_func_void() #0 {
; GFX789-LABEL: i16_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i16_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_u16 v0, off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load i16, ptr addrspace(1) undef
  ret i16 %val
}

define zeroext i16 @i16_zeroext_func_void() #0 {
; GFX789-LABEL: i16_zeroext_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i16_zeroext_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_u16 v0, off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load i16, ptr addrspace(1) undef
  ret i16 %val
}

define signext i16 @i16_signext_func_void() #0 {
; GFX789-LABEL: i16_signext_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_sshort v0, off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i16_signext_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_i16 v0, off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load i16, ptr addrspace(1) undef
  ret i16 %val
}

define i32 @i32_func_void() #0 {
; GFX789-LABEL: i32_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_dword v0, off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i32_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load i32, ptr addrspace(1) undef
  ret i32 %val
}

define i48 @i48_func_void() #0 {
; GFX789-LABEL: i48_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_dword v0, off, s[4:7], 0
; GFX789-NEXT:    buffer_load_ushort v1, off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i48_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    s_clause 0x1
; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0
; GFX11-NEXT:    buffer_load_u16 v1, off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load i48, ptr addrspace(1) undef, align 8
  ret i48 %val
}

define zeroext i48 @i48_zeroext_func_void() #0 {
; GFX789-LABEL: i48_zeroext_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_dword v0, off, s[4:7], 0
; GFX789-NEXT:    buffer_load_ushort v1, off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i48_zeroext_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    s_clause 0x1
; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0
; GFX11-NEXT:    buffer_load_u16 v1, off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load i48, ptr addrspace(1) undef, align 8
  ret i48 %val
}

define signext i48 @i48_signext_func_void() #0 {
; GFX789-LABEL: i48_signext_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_dword v0, off, s[4:7], 0
; GFX789-NEXT:    buffer_load_sshort v1, off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i48_signext_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    s_clause 0x1
; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0
; GFX11-NEXT:    buffer_load_i16 v1, off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load i48, ptr addrspace(1) undef, align 8
  ret i48 %val
}

define i63 @i63_func_void(i63 %val) #0 {
; GFX789-LABEL: i63_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i63_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  ret i63 %val
}

define zeroext i63 @i63_zeroext_func_void(i63 %val) #0 {
; GFX789-LABEL: i63_zeroext_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i63_zeroext_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  ret i63 %val
}

define signext i63 @i63_signext_func_void(i63 %val) #0 {
; CI-LABEL: i63_signext_func_void:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
; CI-NEXT:    v_ashr_i64 v[0:1], v[0:1], 1
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX89-LABEL: i63_signext_func_void:
; GFX89:       ; %bb.0:
; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX89-NEXT:    v_ashrrev_i64 v[0:1], 1, v[0:1]
; GFX89-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i63_signext_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_ashrrev_i64 v[0:1], 1, v[0:1]
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  ret i63 %val
}

define i64 @i64_func_void() #0 {
; GFX789-LABEL: i64_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i64_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load i64, ptr addrspace(1) undef
  ret i64 %val
}

define i65 @i65_func_void() #0 {
; GFX789-LABEL: i65_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; GFX789-NEXT:    buffer_load_ubyte v2, off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i65_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    s_clause 0x1
; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT:    buffer_load_u8 v2, off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load i65, ptr addrspace(1) undef
  ret i65 %val
}

define float @f32_func_void() #0 {
; GFX789-LABEL: f32_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_dword v0, off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: f32_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load float, ptr addrspace(1) undef
  ret float %val
}

define double @f64_func_void() #0 {
; GFX789-LABEL: f64_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: f64_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load double, ptr addrspace(1) undef
  ret double %val
}

define <2 x double> @v2f64_func_void() #0 {
; GFX789-LABEL: v2f64_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v2f64_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load <2 x double>, ptr addrspace(1) undef
  ret <2 x double> %val
}

define <2 x i32> @v2i32_func_void() #0 {
; GFX789-LABEL: v2i32_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v2i32_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load <2 x i32>, ptr addrspace(1) undef
  ret <2 x i32> %val
}

define <3 x i32> @v3i32_func_void() #0 {
; GFX789-LABEL: v3i32_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_dwordx3 v[0:2], off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v3i32_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_b96 v[0:2], off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load <3 x i32>, ptr addrspace(1) undef
  ret <3 x i32> %val
}

define <4 x i32> @v4i32_func_void() #0 {
; GFX789-LABEL: v4i32_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v4i32_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load <4 x i32>, ptr addrspace(1) undef
  ret <4 x i32> %val
}

define <5 x i32> @v5i32_func_void() #0 {
; GFX789-LABEL: v5i32_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_dword v4, off, s[4:7], 0 glc
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0 glc
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v5i32_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_b32 v4, off, s[0:3], 0 glc dlc
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0 glc dlc
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load volatile <5 x i32>, ptr addrspace(1) undef
  ret <5 x i32> %val
}

define <8 x i32> @v8i32_func_void() #0 {
; GFX789-LABEL: v8i32_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    s_waitcnt lgkmcnt(0)
; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v8i32_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_clause 0x1
; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
  %val = load <8 x i32>, ptr addrspace(1) %ptr
  ret <8 x i32> %val
}

define <16 x i32> @v16i32_func_void() #0 {
; GFX789-LABEL: v16i32_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    s_waitcnt lgkmcnt(0)
; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
; GFX789-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
; GFX789-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v16i32_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_clause 0x3
; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
; GFX11-NEXT:    buffer_load_b128 v[8:11], off, s[0:3], 0 offset:32
; GFX11-NEXT:    buffer_load_b128 v[12:15], off, s[0:3], 0 offset:48
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
  %val = load <16 x i32>, ptr addrspace(1) %ptr
  ret <16 x i32> %val
}

define <32 x i32> @v32i32_func_void() #0 {
; GFX789-LABEL: v32i32_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    s_waitcnt lgkmcnt(0)
; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
; GFX789-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
; GFX789-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
; GFX789-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
; GFX789-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
; GFX789-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
; GFX789-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v32i32_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_clause 0x7
; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
; GFX11-NEXT:    buffer_load_b128 v[8:11], off, s[0:3], 0 offset:32
; GFX11-NEXT:    buffer_load_b128 v[12:15], off, s[0:3], 0 offset:48
; GFX11-NEXT:    buffer_load_b128 v[16:19], off, s[0:3], 0 offset:64
; GFX11-NEXT:    buffer_load_b128 v[20:23], off, s[0:3], 0 offset:80
; GFX11-NEXT:    buffer_load_b128 v[24:27], off, s[0:3], 0 offset:96
; GFX11-NEXT:    buffer_load_b128 v[28:31], off, s[0:3], 0 offset:112
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
  %val = load <32 x i32>, ptr addrspace(1) %ptr
  ret <32 x i32> %val
}

define <2 x i64> @v2i64_func_void() #0 {
; GFX789-LABEL: v2i64_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v2i64_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load <2 x i64>, ptr addrspace(1) undef
  ret <2 x i64> %val
}

define <3 x i64> @v3i64_func_void() #0 {
; GFX789-LABEL: v3i64_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    s_waitcnt lgkmcnt(0)
; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT:    buffer_load_dwordx2 v[4:5], off, s[4:7], 0 offset:16
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v3i64_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_clause 0x1
; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT:    buffer_load_b64 v[4:5], off, s[0:3], 0 offset:16
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
  %val = load <3 x i64>, ptr addrspace(1) %ptr
  ret <3 x i64> %val
}

define <4 x i64> @v4i64_func_void() #0 {
; GFX789-LABEL: v4i64_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    s_waitcnt lgkmcnt(0)
; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v4i64_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_clause 0x1
; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
  %val = load <4 x i64>, ptr addrspace(1) %ptr
  ret <4 x i64> %val
}

define <5 x i64> @v5i64_func_void() #0 {
; GFX789-LABEL: v5i64_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    s_waitcnt lgkmcnt(0)
; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
; GFX789-NEXT:    buffer_load_dwordx2 v[8:9], off, s[4:7], 0 offset:32
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v5i64_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_clause 0x2
; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
; GFX11-NEXT:    buffer_load_b64 v[8:9], off, s[0:3], 0 offset:32
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
  %val = load <5 x i64>, ptr addrspace(1) %ptr
  ret <5 x i64> %val
}

define <8 x i64> @v8i64_func_void() #0 {
; GFX789-LABEL: v8i64_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    s_waitcnt lgkmcnt(0)
; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
; GFX789-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
; GFX789-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v8i64_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_clause 0x3
; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
; GFX11-NEXT:    buffer_load_b128 v[8:11], off, s[0:3], 0 offset:32
; GFX11-NEXT:    buffer_load_b128 v[12:15], off, s[0:3], 0 offset:48
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
  %val = load <8 x i64>, ptr addrspace(1) %ptr
  ret <8 x i64> %val
}

define <16 x i64> @v16i64_func_void() #0 {
; GFX789-LABEL: v16i64_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    s_waitcnt lgkmcnt(0)
; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
; GFX789-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
; GFX789-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
; GFX789-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
; GFX789-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
; GFX789-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
; GFX789-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v16i64_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_clause 0x7
; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
; GFX11-NEXT:    buffer_load_b128 v[8:11], off, s[0:3], 0 offset:32
; GFX11-NEXT:    buffer_load_b128 v[12:15], off, s[0:3], 0 offset:48
; GFX11-NEXT:    buffer_load_b128 v[16:19], off, s[0:3], 0 offset:64
; GFX11-NEXT:    buffer_load_b128 v[20:23], off, s[0:3], 0 offset:80
; GFX11-NEXT:    buffer_load_b128 v[24:27], off, s[0:3], 0 offset:96
; GFX11-NEXT:    buffer_load_b128 v[28:31], off, s[0:3], 0 offset:112
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
  %val = load <16 x i64>, ptr addrspace(1) %ptr
  ret <16 x i64> %val
}

define <2 x i16> @v2i16_func_void() #0 {
; CI-LABEL: v2i16_func_void:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    s_mov_b32 s7, 0xf000
; CI-NEXT:    s_mov_b32 s6, -1
; CI-NEXT:    buffer_load_dword v0, off, s[4:7], 0
; CI-NEXT:    s_waitcnt vmcnt(0)
; CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v2i16_func_void:
; GFX89:       ; %bb.0:
; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT:    s_mov_b32 s7, 0xf000
; GFX89-NEXT:    s_mov_b32 s6, -1
; GFX89-NEXT:    buffer_load_dword v0, off, s[4:7], 0
; GFX89-NEXT:    s_waitcnt vmcnt(0)
; GFX89-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v2i16_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load <2 x i16>, ptr addrspace(1) undef
  ret <2 x i16> %val
}

define <3 x i16> @v3i16_func_void() #0 {
; CI-LABEL: v3i16_func_void:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    s_mov_b32 s7, 0xf000
; CI-NEXT:    s_mov_b32 s6, -1
; CI-NEXT:    buffer_load_dwordx2 v[2:3], off, s[4:7], 0
; CI-NEXT:    s_waitcnt vmcnt(0)
; CI-NEXT:    v_alignbit_b32 v1, v3, v2, 16
; CI-NEXT:    v_mov_b32_e32 v0, v2
; CI-NEXT:    v_mov_b32_e32 v2, v3
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v3i16_func_void:
; GFX89:       ; %bb.0:
; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT:    s_mov_b32 s7, 0xf000
; GFX89-NEXT:    s_mov_b32 s6, -1
; GFX89-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; GFX89-NEXT:    s_waitcnt vmcnt(0)
; GFX89-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v3i16_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load <3 x i16>, ptr addrspace(1) undef
  ret <3 x i16> %val
}

define <4 x i16> @v4i16_func_void() #0 {
; CI-LABEL: v4i16_func_void:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    s_mov_b32 s7, 0xf000
; CI-NEXT:    s_mov_b32 s6, -1
; CI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; CI-NEXT:    s_waitcnt vmcnt(0)
; CI-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
; CI-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
; CI-NEXT:    v_mov_b32_e32 v2, v1
; CI-NEXT:    v_mov_b32_e32 v1, v4
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v4i16_func_void:
; GFX89:       ; %bb.0:
; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT:    s_mov_b32 s7, 0xf000
; GFX89-NEXT:    s_mov_b32 s6, -1
; GFX89-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; GFX89-NEXT:    s_waitcnt vmcnt(0)
; GFX89-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v4i16_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load <4 x i16>, ptr addrspace(1) undef
  ret <4 x i16> %val
}

define <4 x half> @v4f16_func_void() #0 {
; CI-LABEL: v4f16_func_void:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    s_mov_b32 s7, 0xf000
; CI-NEXT:    s_mov_b32 s6, -1
; CI-NEXT:    buffer_load_dwordx2 v[3:4], off, s[4:7], 0
; CI-NEXT:    s_waitcnt vmcnt(0)
; CI-NEXT:    v_cvt_f32_f16_e32 v0, v3
; CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
; CI-NEXT:    v_lshrrev_b32_e32 v3, 16, v4
; CI-NEXT:    v_cvt_f32_f16_e32 v2, v4
; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v4f16_func_void:
; GFX89:       ; %bb.0:
; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT:    s_mov_b32 s7, 0xf000
; GFX89-NEXT:    s_mov_b32 s6, -1
; GFX89-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; GFX89-NEXT:    s_waitcnt vmcnt(0)
; GFX89-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v4f16_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load <4 x half>, ptr addrspace(1) undef
  ret <4 x half> %val
}

; FIXME: Mixing buffer and global
; FIXME: Should not scalarize
define <5 x i16> @v5i16_func_void() #0 {
; CI-LABEL: v5i16_func_void:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; CI-NEXT:    s_mov_b32 s7, 0xf000
; CI-NEXT:    s_mov_b32 s6, -1
; CI-NEXT:    s_waitcnt lgkmcnt(0)
; CI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; CI-NEXT:    buffer_load_sshort v4, off, s[4:7], 0 offset:8
; CI-NEXT:    s_waitcnt vmcnt(1)
; CI-NEXT:    v_alignbit_b32 v5, v1, v0, 16
; CI-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
; CI-NEXT:    v_mov_b32_e32 v2, v1
; CI-NEXT:    v_mov_b32_e32 v1, v5
; CI-NEXT:    s_waitcnt vmcnt(0)
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v5i16_func_void:
; GFX89:       ; %bb.0:
; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX89-NEXT:    s_mov_b32 s7, 0xf000
; GFX89-NEXT:    s_mov_b32 s6, -1
; GFX89-NEXT:    s_waitcnt lgkmcnt(0)
; GFX89-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX89-NEXT:    s_waitcnt vmcnt(0)
; GFX89-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v5i16_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
  %val = load <5 x i16>, ptr addrspace(1) %ptr
  ret <5 x i16> %val
}

define <8 x i16> @v8i16_func_void() #0 {
; CI-LABEL: v8i16_func_void:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; CI-NEXT:    s_mov_b32 s7, 0xf000
; CI-NEXT:    s_mov_b32 s6, -1
; CI-NEXT:    s_waitcnt lgkmcnt(0)
; CI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0
; CI-NEXT:    s_waitcnt vmcnt(0)
; CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v8
; CI-NEXT:    v_lshrrev_b32_e32 v3, 16, v9
; CI-NEXT:    v_lshrrev_b32_e32 v5, 16, v10
; CI-NEXT:    v_lshrrev_b32_e32 v7, 16, v11
; CI-NEXT:    v_mov_b32_e32 v0, v8
; CI-NEXT:    v_mov_b32_e32 v2, v9
; CI-NEXT:    v_mov_b32_e32 v4, v10
; CI-NEXT:    v_mov_b32_e32 v6, v11
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v8i16_func_void:
; GFX89:       ; %bb.0:
; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX89-NEXT:    s_mov_b32 s7, 0xf000
; GFX89-NEXT:    s_mov_b32 s6, -1
; GFX89-NEXT:    s_waitcnt lgkmcnt(0)
; GFX89-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX89-NEXT:    s_waitcnt vmcnt(0)
; GFX89-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v8i16_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
  %val = load <8 x i16>, ptr addrspace(1) %ptr
  ret <8 x i16> %val
}

define <16 x i16> @v16i16_func_void() #0 {
; CI-LABEL: v16i16_func_void:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; CI-NEXT:    s_mov_b32 s7, 0xf000
; CI-NEXT:    s_mov_b32 s6, -1
; CI-NEXT:    s_waitcnt lgkmcnt(0)
; CI-NEXT:    buffer_load_dwordx4 v[22:25], off, s[4:7], 0
; CI-NEXT:    buffer_load_dwordx4 v[18:21], off, s[4:7], 0 offset:16
; CI-NEXT:    s_waitcnt vmcnt(1)
; CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v22
; CI-NEXT:    v_lshrrev_b32_e32 v3, 16, v23
; CI-NEXT:    v_lshrrev_b32_e32 v5, 16, v24
; CI-NEXT:    v_lshrrev_b32_e32 v7, 16, v25
; CI-NEXT:    s_waitcnt vmcnt(0)
; CI-NEXT:    v_lshrrev_b32_e32 v9, 16, v18
; CI-NEXT:    v_lshrrev_b32_e32 v11, 16, v19
; CI-NEXT:    v_lshrrev_b32_e32 v13, 16, v20
; CI-NEXT:    v_lshrrev_b32_e32 v15, 16, v21
; CI-NEXT:    v_mov_b32_e32 v0, v22
; CI-NEXT:    v_mov_b32_e32 v2, v23
; CI-NEXT:    v_mov_b32_e32 v4, v24
; CI-NEXT:    v_mov_b32_e32 v6, v25
; CI-NEXT:    v_mov_b32_e32 v8, v18
; CI-NEXT:    v_mov_b32_e32 v10, v19
; CI-NEXT:    v_mov_b32_e32 v12, v20
; CI-NEXT:    v_mov_b32_e32 v14, v21
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v16i16_func_void:
; GFX89:       ; %bb.0:
; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX89-NEXT:    s_mov_b32 s7, 0xf000
; GFX89-NEXT:    s_mov_b32 s6, -1
; GFX89-NEXT:    s_waitcnt lgkmcnt(0)
; GFX89-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX89-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
; GFX89-NEXT:    s_waitcnt vmcnt(0)
; GFX89-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v16i16_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_clause 0x1
; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
  %val = load <16 x i16>, ptr addrspace(1) %ptr
  ret <16 x i16> %val
}

; FIXME: Should pack
define <16 x i8> @v16i8_func_void() #0 {
; GFX789-LABEL: v16i8_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    s_waitcnt lgkmcnt(0)
; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    v_lshrrev_b32_e32 v16, 8, v0
; GFX789-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
; GFX789-NEXT:    v_lshrrev_b32_e32 v18, 24, v0
; GFX789-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
; GFX789-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
; GFX789-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
; GFX789-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
; GFX789-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
; GFX789-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
; GFX789-NEXT:    v_lshrrev_b32_e32 v13, 8, v3
; GFX789-NEXT:    v_lshrrev_b32_e32 v14, 16, v3
; GFX789-NEXT:    v_lshrrev_b32_e32 v15, 24, v3
; GFX789-NEXT:    v_mov_b32_e32 v4, v1
; GFX789-NEXT:    v_mov_b32_e32 v8, v2
; GFX789-NEXT:    v_mov_b32_e32 v12, v3
; GFX789-NEXT:    v_mov_b32_e32 v1, v16
; GFX789-NEXT:    v_mov_b32_e32 v2, v17
; GFX789-NEXT:    v_mov_b32_e32 v3, v18
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v16i8_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    v_lshrrev_b32_e32 v16, 8, v0
; GFX11-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 24, v0
; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
; GFX11-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
; GFX11-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
; GFX11-NEXT:    v_lshrrev_b32_e32 v13, 8, v3
; GFX11-NEXT:    v_lshrrev_b32_e32 v14, 16, v3
; GFX11-NEXT:    v_lshrrev_b32_e32 v15, 24, v3
; GFX11-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v16
; GFX11-NEXT:    v_mov_b32_e32 v8, v2
; GFX11-NEXT:    v_dual_mov_b32 v12, v3 :: v_dual_mov_b32 v3, v18
; GFX11-NEXT:    v_mov_b32_e32 v2, v17
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
  %val = load <16 x i8>, ptr addrspace(1) %ptr
  ret <16 x i8> %val
}

; FIXME: Should pack
define <4  x i8> @v4i8_func_void() #0 {
; GFX789-LABEL: v4i8_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    s_waitcnt lgkmcnt(0)
; GFX789-NEXT:    buffer_load_dword v0, off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
; GFX789-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
; GFX789-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v4i8_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
  %val = load <4  x i8>, ptr addrspace(1) %ptr
  ret <4  x i8> %val
}

define {i8, i32} @struct_i8_i32_func_void() #0 {
; GFX789-LABEL: struct_i8_i32_func_void:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_ubyte v0, off, s[4:7], 0
; GFX789-NEXT:    buffer_load_dword v1, off, s[4:7], 0
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: struct_i8_i32_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    s_clause 0x1
; GFX11-NEXT:    buffer_load_u8 v0, off, s[0:3], 0
; GFX11-NEXT:    buffer_load_b32 v1, off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load { i8, i32 }, ptr addrspace(1) undef
  ret { i8, i32 } %val
}

define void @void_func_sret_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }) %arg0) #0 {
; GFX789-LABEL: void_func_sret_struct_i8_i32:
; GFX789:       ; %bb.0:
; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX789-NEXT:    s_mov_b32 s7, 0xf000
; GFX789-NEXT:    s_mov_b32 s6, -1
; GFX789-NEXT:    buffer_load_ubyte v1, off, s[4:7], 0 glc
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    buffer_load_dword v2, off, s[4:7], 0 glc
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    buffer_store_byte v1, v0, s[0:3], 0 offen
; GFX789-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; GFX789-NEXT:    s_waitcnt vmcnt(0)
; GFX789-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_sret_struct_i8_i32:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_u8 v1, off, s[0:3], 0 glc dlc
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    buffer_load_b32 v2, off, s[0:3], 0 glc dlc
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_clause 0x1
; GFX11-NEXT:    scratch_store_b8 v0, v1, off
; GFX11-NEXT:    scratch_store_b32 v0, v2, off offset:4
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val0 = load volatile i8, ptr addrspace(1) undef
  %val1 = load volatile i32, ptr addrspace(1) undef
  %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 0
  %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 1
  store i8 %val0, ptr addrspace(5) %gep0
  store i32 %val1, ptr addrspace(5) %gep1
  ret void
}

; FIXME: Should be able to fold offsets in all of these pre-gfx9. Call
; lowering introduces an extra CopyToReg/CopyFromReg obscuring the
; AssertZext inserted. Not using it introduces the spills.
define <33 x i32> @v33i32_func_void() #0 {
; CI-LABEL: v33i32_func_void:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; CI-NEXT:    s_mov_b32 s7, 0xf000
; CI-NEXT:    s_mov_b32 s6, -1
; CI-NEXT:    v_add_i32_e32 v34, vcc, 0x80, v0
; CI-NEXT:    s_waitcnt lgkmcnt(0)
; CI-NEXT:    buffer_load_dword v33, off, s[4:7], 0 offset:128
; CI-NEXT:    buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112
; CI-NEXT:    buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96
; CI-NEXT:    buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80
; CI-NEXT:    buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64
; CI-NEXT:    buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48
; CI-NEXT:    buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32
; CI-NEXT:    buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16
; CI-NEXT:    buffer_load_dwordx4 v[29:32], off, s[4:7], 0
; CI-NEXT:    s_waitcnt vmcnt(8)
; CI-NEXT:    buffer_store_dword v33, v34, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v33, vcc, 0x7c, v0
; CI-NEXT:    s_waitcnt vmcnt(8)
; CI-NEXT:    buffer_store_dword v4, v33, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v4, vcc, 0x78, v0
; CI-NEXT:    buffer_store_dword v3, v4, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v3, vcc, 0x74, v0
; CI-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v2, vcc, 0x70, v0
; CI-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x6c, v0
; CI-NEXT:    v_add_i32_e32 v2, vcc, 0x68, v0
; CI-NEXT:    v_add_i32_e32 v3, vcc, 0x64, v0
; CI-NEXT:    s_waitcnt vmcnt(11)
; CI-NEXT:    buffer_store_dword v8, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x60, v0
; CI-NEXT:    buffer_store_dword v7, v2, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v2, vcc, 0x5c, v0
; CI-NEXT:    buffer_store_dword v6, v3, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v3, vcc, 0x58, v0
; CI-NEXT:    buffer_store_dword v5, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x54, v0
; CI-NEXT:    v_add_i32_e32 v4, vcc, 0x50, v0
; CI-NEXT:    v_add_i32_e32 v5, vcc, 0x4c, v0
; CI-NEXT:    s_waitcnt vmcnt(14)
; CI-NEXT:    buffer_store_dword v12, v2, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v11, v3, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v10, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 56, v0
; CI-NEXT:    v_add_i32_e32 v6, vcc, 0x48, v0
; CI-NEXT:    v_add_i32_e32 v7, vcc, 0x44, v0
; CI-NEXT:    v_add_i32_e32 v2, vcc, 64, v0
; CI-NEXT:    v_add_i32_e32 v3, vcc, 60, v0
; CI-NEXT:    buffer_store_dword v9, v4, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v4, vcc, 52, v0
; CI-NEXT:    v_add_i32_e32 v8, vcc, 48, v0
; CI-NEXT:    v_add_i32_e32 v9, vcc, 44, v0
; CI-NEXT:    v_add_i32_e32 v10, vcc, 40, v0
; CI-NEXT:    v_add_i32_e32 v11, vcc, 36, v0
; CI-NEXT:    s_waitcnt vmcnt(14)
; CI-NEXT:    buffer_store_dword v16, v5, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v15, v6, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v14, v7, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v13, v2, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v20, v3, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v19, v1, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v18, v4, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v17, v8, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v24, v9, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v23, v10, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v22, v11, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 32, v0
; CI-NEXT:    buffer_store_dword v21, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 28, v0
; CI-NEXT:    buffer_store_dword v28, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 24, v0
; CI-NEXT:    buffer_store_dword v27, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 20, v0
; CI-NEXT:    buffer_store_dword v26, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 16, v0
; CI-NEXT:    buffer_store_dword v25, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 12, v0
; CI-NEXT:    s_waitcnt vmcnt(14)
; CI-NEXT:    buffer_store_dword v32, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 8, v0
; CI-NEXT:    buffer_store_dword v31, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 4, v0
; CI-NEXT:    buffer_store_dword v30, v1, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen
; CI-NEXT:    s_waitcnt vmcnt(0)
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v33i32_func_void:
; GFX8:       ; %bb.0:
; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX8-NEXT:    s_mov_b32 s7, 0xf000
; GFX8-NEXT:    s_mov_b32 s6, -1
; GFX8-NEXT:    v_add_u32_e32 v34, vcc, 0x80, v0
; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
; GFX8-NEXT:    buffer_load_dword v33, off, s[4:7], 0 offset:128
; GFX8-NEXT:    buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112
; GFX8-NEXT:    buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96
; GFX8-NEXT:    buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80
; GFX8-NEXT:    buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64
; GFX8-NEXT:    buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48
; GFX8-NEXT:    buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32
; GFX8-NEXT:    buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16
; GFX8-NEXT:    buffer_load_dwordx4 v[29:32], off, s[4:7], 0
; GFX8-NEXT:    s_waitcnt vmcnt(8)
; GFX8-NEXT:    buffer_store_dword v33, v34, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v33, vcc, 0x7c, v0
; GFX8-NEXT:    s_waitcnt vmcnt(8)
; GFX8-NEXT:    buffer_store_dword v4, v33, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0x78, v0
; GFX8-NEXT:    buffer_store_dword v3, v4, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x74, v0
; GFX8-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0x70, v0
; GFX8-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x6c, v0
; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0x68, v0
; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x64, v0
; GFX8-NEXT:    s_waitcnt vmcnt(11)
; GFX8-NEXT:    buffer_store_dword v8, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x60, v0
; GFX8-NEXT:    buffer_store_dword v7, v2, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0x5c, v0
; GFX8-NEXT:    buffer_store_dword v6, v3, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x58, v0
; GFX8-NEXT:    buffer_store_dword v5, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x54, v0
; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0x50, v0
; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 0x4c, v0
; GFX8-NEXT:    s_waitcnt vmcnt(14)
; GFX8-NEXT:    buffer_store_dword v12, v2, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v11, v3, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v10, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 56, v0
; GFX8-NEXT:    v_add_u32_e32 v6, vcc, 0x48, v0
; GFX8-NEXT:    v_add_u32_e32 v7, vcc, 0x44, v0
; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 64, v0
; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 60, v0
; GFX8-NEXT:    buffer_store_dword v9, v4, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 52, v0
; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 48, v0
; GFX8-NEXT:    v_add_u32_e32 v9, vcc, 44, v0
; GFX8-NEXT:    v_add_u32_e32 v10, vcc, 40, v0
; GFX8-NEXT:    v_add_u32_e32 v11, vcc, 36, v0
; GFX8-NEXT:    s_waitcnt vmcnt(14)
; GFX8-NEXT:    buffer_store_dword v16, v5, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v15, v6, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v14, v7, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v13, v2, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v20, v3, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v19, v1, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v18, v4, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v17, v8, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v24, v9, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v23, v10, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v22, v11, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 32, v0
; GFX8-NEXT:    buffer_store_dword v21, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 28, v0
; GFX8-NEXT:    buffer_store_dword v28, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 24, v0
; GFX8-NEXT:    buffer_store_dword v27, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 20, v0
; GFX8-NEXT:    buffer_store_dword v26, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 16, v0
; GFX8-NEXT:    buffer_store_dword v25, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 12, v0
; GFX8-NEXT:    s_waitcnt vmcnt(14)
; GFX8-NEXT:    buffer_store_dword v32, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 8, v0
; GFX8-NEXT:    buffer_store_dword v31, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 4, v0
; GFX8-NEXT:    buffer_store_dword v30, v1, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v33i32_func_void:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX9-NEXT:    s_mov_b32 s7, 0xf000
; GFX9-NEXT:    s_mov_b32 s6, -1
; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
; GFX9-NEXT:    buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112
; GFX9-NEXT:    buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96
; GFX9-NEXT:    buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80
; GFX9-NEXT:    buffer_load_dword v33, off, s[4:7], 0 offset:128
; GFX9-NEXT:    buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64
; GFX9-NEXT:    buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48
; GFX9-NEXT:    buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32
; GFX9-NEXT:    buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16
; GFX9-NEXT:    buffer_load_dwordx4 v[29:32], off, s[4:7], 0
; GFX9-NEXT:    s_waitcnt vmcnt(8)
; GFX9-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:124
; GFX9-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:120
; GFX9-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:116
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:112
; GFX9-NEXT:    s_waitcnt vmcnt(11)
; GFX9-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:108
; GFX9-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:104
; GFX9-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:100
; GFX9-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:96
; GFX9-NEXT:    s_waitcnt vmcnt(14)
; GFX9-NEXT:    buffer_store_dword v12, v0, s[0:3], 0 offen offset:92
; GFX9-NEXT:    buffer_store_dword v11, v0, s[0:3], 0 offen offset:88
; GFX9-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:84
; GFX9-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:80
; GFX9-NEXT:    s_waitcnt vmcnt(17)
; GFX9-NEXT:    buffer_store_dword v33, v0, s[0:3], 0 offen offset:128
; GFX9-NEXT:    s_waitcnt vmcnt(17)
; GFX9-NEXT:    buffer_store_dword v16, v0, s[0:3], 0 offen offset:76
; GFX9-NEXT:    buffer_store_dword v15, v0, s[0:3], 0 offen offset:72
; GFX9-NEXT:    buffer_store_dword v14, v0, s[0:3], 0 offen offset:68
; GFX9-NEXT:    buffer_store_dword v13, v0, s[0:3], 0 offen offset:64
; GFX9-NEXT:    s_waitcnt vmcnt(20)
; GFX9-NEXT:    buffer_store_dword v20, v0, s[0:3], 0 offen offset:60
; GFX9-NEXT:    buffer_store_dword v19, v0, s[0:3], 0 offen offset:56
; GFX9-NEXT:    buffer_store_dword v18, v0, s[0:3], 0 offen offset:52
; GFX9-NEXT:    buffer_store_dword v17, v0, s[0:3], 0 offen offset:48
; GFX9-NEXT:    s_waitcnt vmcnt(23)
; GFX9-NEXT:    buffer_store_dword v24, v0, s[0:3], 0 offen offset:44
; GFX9-NEXT:    buffer_store_dword v23, v0, s[0:3], 0 offen offset:40
; GFX9-NEXT:    buffer_store_dword v22, v0, s[0:3], 0 offen offset:36
; GFX9-NEXT:    buffer_store_dword v21, v0, s[0:3], 0 offen offset:32
; GFX9-NEXT:    s_waitcnt vmcnt(26)
; GFX9-NEXT:    buffer_store_dword v28, v0, s[0:3], 0 offen offset:28
; GFX9-NEXT:    buffer_store_dword v27, v0, s[0:3], 0 offen offset:24
; GFX9-NEXT:    buffer_store_dword v26, v0, s[0:3], 0 offen offset:20
; GFX9-NEXT:    buffer_store_dword v25, v0, s[0:3], 0 offen offset:16
; GFX9-NEXT:    s_waitcnt vmcnt(29)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:12
; GFX9-NEXT:    buffer_store_dword v31, v0, s[0:3], 0 offen offset:8
; GFX9-NEXT:    buffer_store_dword v30, v0, s[0:3], 0 offen offset:4
; GFX9-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v33i32_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_clause 0x8
; GFX11-NEXT:    buffer_load_b128 v[1:4], off, s[0:3], 0 offset:112
; GFX11-NEXT:    buffer_load_b128 v[5:8], off, s[0:3], 0 offset:96
; GFX11-NEXT:    buffer_load_b128 v[9:12], off, s[0:3], 0 offset:80
; GFX11-NEXT:    buffer_load_b128 v[13:16], off, s[0:3], 0 offset:64
; GFX11-NEXT:    buffer_load_b128 v[17:20], off, s[0:3], 0 offset:48
; GFX11-NEXT:    buffer_load_b128 v[21:24], off, s[0:3], 0 offset:32
; GFX11-NEXT:    buffer_load_b128 v[25:28], off, s[0:3], 0 offset:16
; GFX11-NEXT:    buffer_load_b128 v[29:32], off, s[0:3], 0
; GFX11-NEXT:    buffer_load_b32 v33, off, s[0:3], 0 offset:128
; GFX11-NEXT:    s_waitcnt vmcnt(8)
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:112
; GFX11-NEXT:    s_waitcnt vmcnt(7)
; GFX11-NEXT:    scratch_store_b128 v0, v[5:8], off offset:96
; GFX11-NEXT:    s_waitcnt vmcnt(6)
; GFX11-NEXT:    scratch_store_b128 v0, v[9:12], off offset:80
; GFX11-NEXT:    s_waitcnt vmcnt(5)
; GFX11-NEXT:    scratch_store_b128 v0, v[13:16], off offset:64
; GFX11-NEXT:    s_waitcnt vmcnt(4)
; GFX11-NEXT:    scratch_store_b128 v0, v[17:20], off offset:48
; GFX11-NEXT:    s_waitcnt vmcnt(3)
; GFX11-NEXT:    scratch_store_b128 v0, v[21:24], off offset:32
; GFX11-NEXT:    s_waitcnt vmcnt(2)
; GFX11-NEXT:    scratch_store_b128 v0, v[25:28], off offset:16
; GFX11-NEXT:    s_waitcnt vmcnt(1)
; GFX11-NEXT:    scratch_store_b128 v0, v[29:32], off
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    scratch_store_b32 v0, v33, off offset:128
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
  %val = load <33 x i32>, ptr addrspace(1) %ptr
  ret <33 x i32> %val
}

define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
; CI-LABEL: struct_v32i32_i32_func_void:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; CI-NEXT:    s_mov_b32 s7, 0xf000
; CI-NEXT:    s_mov_b32 s6, -1
; CI-NEXT:    v_add_i32_e32 v34, vcc, 0x80, v0
; CI-NEXT:    s_waitcnt lgkmcnt(0)
; CI-NEXT:    buffer_load_dword v33, off, s[4:7], 0 offset:128
; CI-NEXT:    buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112
; CI-NEXT:    buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96
; CI-NEXT:    buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80
; CI-NEXT:    buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64
; CI-NEXT:    buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48
; CI-NEXT:    buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32
; CI-NEXT:    buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16
; CI-NEXT:    buffer_load_dwordx4 v[29:32], off, s[4:7], 0
; CI-NEXT:    s_waitcnt vmcnt(8)
; CI-NEXT:    buffer_store_dword v33, v34, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v33, vcc, 0x7c, v0
; CI-NEXT:    s_waitcnt vmcnt(8)
; CI-NEXT:    buffer_store_dword v4, v33, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v4, vcc, 0x78, v0
; CI-NEXT:    buffer_store_dword v3, v4, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v3, vcc, 0x74, v0
; CI-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v2, vcc, 0x70, v0
; CI-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x6c, v0
; CI-NEXT:    v_add_i32_e32 v2, vcc, 0x68, v0
; CI-NEXT:    v_add_i32_e32 v3, vcc, 0x64, v0
; CI-NEXT:    s_waitcnt vmcnt(11)
; CI-NEXT:    buffer_store_dword v8, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x60, v0
; CI-NEXT:    buffer_store_dword v7, v2, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v2, vcc, 0x5c, v0
; CI-NEXT:    buffer_store_dword v6, v3, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v3, vcc, 0x58, v0
; CI-NEXT:    buffer_store_dword v5, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x54, v0
; CI-NEXT:    v_add_i32_e32 v4, vcc, 0x50, v0
; CI-NEXT:    v_add_i32_e32 v5, vcc, 0x4c, v0
; CI-NEXT:    s_waitcnt vmcnt(14)
; CI-NEXT:    buffer_store_dword v12, v2, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v11, v3, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v10, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 56, v0
; CI-NEXT:    v_add_i32_e32 v6, vcc, 0x48, v0
; CI-NEXT:    v_add_i32_e32 v7, vcc, 0x44, v0
; CI-NEXT:    v_add_i32_e32 v2, vcc, 64, v0
; CI-NEXT:    v_add_i32_e32 v3, vcc, 60, v0
; CI-NEXT:    buffer_store_dword v9, v4, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v4, vcc, 52, v0
; CI-NEXT:    v_add_i32_e32 v8, vcc, 48, v0
; CI-NEXT:    v_add_i32_e32 v9, vcc, 44, v0
; CI-NEXT:    v_add_i32_e32 v10, vcc, 40, v0
; CI-NEXT:    v_add_i32_e32 v11, vcc, 36, v0
; CI-NEXT:    s_waitcnt vmcnt(14)
; CI-NEXT:    buffer_store_dword v16, v5, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v15, v6, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v14, v7, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v13, v2, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v20, v3, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v19, v1, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v18, v4, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v17, v8, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v24, v9, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v23, v10, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v22, v11, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 32, v0
; CI-NEXT:    buffer_store_dword v21, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 28, v0
; CI-NEXT:    buffer_store_dword v28, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 24, v0
; CI-NEXT:    buffer_store_dword v27, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 20, v0
; CI-NEXT:    buffer_store_dword v26, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 16, v0
; CI-NEXT:    buffer_store_dword v25, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 12, v0
; CI-NEXT:    s_waitcnt vmcnt(14)
; CI-NEXT:    buffer_store_dword v32, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 8, v0
; CI-NEXT:    buffer_store_dword v31, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 4, v0
; CI-NEXT:    buffer_store_dword v30, v1, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen
; CI-NEXT:    s_waitcnt vmcnt(0)
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX8-LABEL: struct_v32i32_i32_func_void:
; GFX8:       ; %bb.0:
; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX8-NEXT:    s_mov_b32 s7, 0xf000
; GFX8-NEXT:    s_mov_b32 s6, -1
; GFX8-NEXT:    v_add_u32_e32 v34, vcc, 0x80, v0
; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
; GFX8-NEXT:    buffer_load_dword v33, off, s[4:7], 0 offset:128
; GFX8-NEXT:    buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112
; GFX8-NEXT:    buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96
; GFX8-NEXT:    buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80
; GFX8-NEXT:    buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64
; GFX8-NEXT:    buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48
; GFX8-NEXT:    buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32
; GFX8-NEXT:    buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16
; GFX8-NEXT:    buffer_load_dwordx4 v[29:32], off, s[4:7], 0
; GFX8-NEXT:    s_waitcnt vmcnt(8)
; GFX8-NEXT:    buffer_store_dword v33, v34, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v33, vcc, 0x7c, v0
; GFX8-NEXT:    s_waitcnt vmcnt(8)
; GFX8-NEXT:    buffer_store_dword v4, v33, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0x78, v0
; GFX8-NEXT:    buffer_store_dword v3, v4, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x74, v0
; GFX8-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0x70, v0
; GFX8-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x6c, v0
; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0x68, v0
; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x64, v0
; GFX8-NEXT:    s_waitcnt vmcnt(11)
; GFX8-NEXT:    buffer_store_dword v8, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x60, v0
; GFX8-NEXT:    buffer_store_dword v7, v2, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0x5c, v0
; GFX8-NEXT:    buffer_store_dword v6, v3, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x58, v0
; GFX8-NEXT:    buffer_store_dword v5, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x54, v0
; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0x50, v0
; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 0x4c, v0
; GFX8-NEXT:    s_waitcnt vmcnt(14)
; GFX8-NEXT:    buffer_store_dword v12, v2, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v11, v3, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v10, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 56, v0
; GFX8-NEXT:    v_add_u32_e32 v6, vcc, 0x48, v0
; GFX8-NEXT:    v_add_u32_e32 v7, vcc, 0x44, v0
; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 64, v0
; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 60, v0
; GFX8-NEXT:    buffer_store_dword v9, v4, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 52, v0
; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 48, v0
; GFX8-NEXT:    v_add_u32_e32 v9, vcc, 44, v0
; GFX8-NEXT:    v_add_u32_e32 v10, vcc, 40, v0
; GFX8-NEXT:    v_add_u32_e32 v11, vcc, 36, v0
; GFX8-NEXT:    s_waitcnt vmcnt(14)
; GFX8-NEXT:    buffer_store_dword v16, v5, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v15, v6, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v14, v7, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v13, v2, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v20, v3, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v19, v1, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v18, v4, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v17, v8, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v24, v9, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v23, v10, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v22, v11, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 32, v0
; GFX8-NEXT:    buffer_store_dword v21, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 28, v0
; GFX8-NEXT:    buffer_store_dword v28, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 24, v0
; GFX8-NEXT:    buffer_store_dword v27, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 20, v0
; GFX8-NEXT:    buffer_store_dword v26, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 16, v0
; GFX8-NEXT:    buffer_store_dword v25, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 12, v0
; GFX8-NEXT:    s_waitcnt vmcnt(14)
; GFX8-NEXT:    buffer_store_dword v32, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 8, v0
; GFX8-NEXT:    buffer_store_dword v31, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 4, v0
; GFX8-NEXT:    buffer_store_dword v30, v1, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    s_setpc_b64 s[30:31]
;
; GFX9-LABEL: struct_v32i32_i32_func_void:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX9-NEXT:    s_mov_b32 s7, 0xf000
; GFX9-NEXT:    s_mov_b32 s6, -1
; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
; GFX9-NEXT:    buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112
; GFX9-NEXT:    buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96
; GFX9-NEXT:    buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80
; GFX9-NEXT:    buffer_load_dword v33, off, s[4:7], 0 offset:128
; GFX9-NEXT:    buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64
; GFX9-NEXT:    buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48
; GFX9-NEXT:    buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32
; GFX9-NEXT:    buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16
; GFX9-NEXT:    buffer_load_dwordx4 v[29:32], off, s[4:7], 0
; GFX9-NEXT:    s_waitcnt vmcnt(8)
; GFX9-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:124
; GFX9-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:120
; GFX9-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:116
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:112
; GFX9-NEXT:    s_waitcnt vmcnt(11)
; GFX9-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:108
; GFX9-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:104
; GFX9-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:100
; GFX9-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:96
; GFX9-NEXT:    s_waitcnt vmcnt(14)
; GFX9-NEXT:    buffer_store_dword v12, v0, s[0:3], 0 offen offset:92
; GFX9-NEXT:    buffer_store_dword v11, v0, s[0:3], 0 offen offset:88
; GFX9-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:84
; GFX9-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:80
; GFX9-NEXT:    s_waitcnt vmcnt(17)
; GFX9-NEXT:    buffer_store_dword v33, v0, s[0:3], 0 offen offset:128
; GFX9-NEXT:    s_waitcnt vmcnt(17)
; GFX9-NEXT:    buffer_store_dword v16, v0, s[0:3], 0 offen offset:76
; GFX9-NEXT:    buffer_store_dword v15, v0, s[0:3], 0 offen offset:72
; GFX9-NEXT:    buffer_store_dword v14, v0, s[0:3], 0 offen offset:68
; GFX9-NEXT:    buffer_store_dword v13, v0, s[0:3], 0 offen offset:64
; GFX9-NEXT:    s_waitcnt vmcnt(20)
; GFX9-NEXT:    buffer_store_dword v20, v0, s[0:3], 0 offen offset:60
; GFX9-NEXT:    buffer_store_dword v19, v0, s[0:3], 0 offen offset:56
; GFX9-NEXT:    buffer_store_dword v18, v0, s[0:3], 0 offen offset:52
; GFX9-NEXT:    buffer_store_dword v17, v0, s[0:3], 0 offen offset:48
; GFX9-NEXT:    s_waitcnt vmcnt(23)
; GFX9-NEXT:    buffer_store_dword v24, v0, s[0:3], 0 offen offset:44
; GFX9-NEXT:    buffer_store_dword v23, v0, s[0:3], 0 offen offset:40
; GFX9-NEXT:    buffer_store_dword v22, v0, s[0:3], 0 offen offset:36
; GFX9-NEXT:    buffer_store_dword v21, v0, s[0:3], 0 offen offset:32
; GFX9-NEXT:    s_waitcnt vmcnt(26)
; GFX9-NEXT:    buffer_store_dword v28, v0, s[0:3], 0 offen offset:28
; GFX9-NEXT:    buffer_store_dword v27, v0, s[0:3], 0 offen offset:24
; GFX9-NEXT:    buffer_store_dword v26, v0, s[0:3], 0 offen offset:20
; GFX9-NEXT:    buffer_store_dword v25, v0, s[0:3], 0 offen offset:16
; GFX9-NEXT:    s_waitcnt vmcnt(29)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:12
; GFX9-NEXT:    buffer_store_dword v31, v0, s[0:3], 0 offen offset:8
; GFX9-NEXT:    buffer_store_dword v30, v0, s[0:3], 0 offen offset:4
; GFX9-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: struct_v32i32_i32_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_clause 0x8
; GFX11-NEXT:    buffer_load_b128 v[1:4], off, s[0:3], 0 offset:112
; GFX11-NEXT:    buffer_load_b128 v[5:8], off, s[0:3], 0 offset:96
; GFX11-NEXT:    buffer_load_b128 v[9:12], off, s[0:3], 0 offset:80
; GFX11-NEXT:    buffer_load_b128 v[13:16], off, s[0:3], 0 offset:64
; GFX11-NEXT:    buffer_load_b128 v[17:20], off, s[0:3], 0 offset:48
; GFX11-NEXT:    buffer_load_b128 v[21:24], off, s[0:3], 0 offset:32
; GFX11-NEXT:    buffer_load_b128 v[25:28], off, s[0:3], 0 offset:16
; GFX11-NEXT:    buffer_load_b128 v[29:32], off, s[0:3], 0
; GFX11-NEXT:    buffer_load_b32 v33, off, s[0:3], 0 offset:128
; GFX11-NEXT:    s_waitcnt vmcnt(8)
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:112
; GFX11-NEXT:    s_waitcnt vmcnt(7)
; GFX11-NEXT:    scratch_store_b128 v0, v[5:8], off offset:96
; GFX11-NEXT:    s_waitcnt vmcnt(6)
; GFX11-NEXT:    scratch_store_b128 v0, v[9:12], off offset:80
; GFX11-NEXT:    s_waitcnt vmcnt(5)
; GFX11-NEXT:    scratch_store_b128 v0, v[13:16], off offset:64
; GFX11-NEXT:    s_waitcnt vmcnt(4)
; GFX11-NEXT:    scratch_store_b128 v0, v[17:20], off offset:48
; GFX11-NEXT:    s_waitcnt vmcnt(3)
; GFX11-NEXT:    scratch_store_b128 v0, v[21:24], off offset:32
; GFX11-NEXT:    s_waitcnt vmcnt(2)
; GFX11-NEXT:    scratch_store_b128 v0, v[25:28], off offset:16
; GFX11-NEXT:    s_waitcnt vmcnt(1)
; GFX11-NEXT:    scratch_store_b128 v0, v[29:32], off
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    scratch_store_b32 v0, v33, off offset:128
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
  %val = load { <32 x i32>, i32 }, ptr addrspace(1) %ptr
  ret { <32 x i32>, i32 }%val
}

define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
; CI-LABEL: struct_i32_v32i32_func_void:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; CI-NEXT:    s_mov_b32 s7, 0xf000
; CI-NEXT:    s_mov_b32 s6, -1
; CI-NEXT:    s_waitcnt lgkmcnt(0)
; CI-NEXT:    buffer_load_dword v33, off, s[4:7], 0
; CI-NEXT:    buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:240
; CI-NEXT:    buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:224
; CI-NEXT:    buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:208
; CI-NEXT:    buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:192
; CI-NEXT:    buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:176
; CI-NEXT:    buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:160
; CI-NEXT:    buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:144
; CI-NEXT:    buffer_load_dwordx4 v[29:32], off, s[4:7], 0 offset:128
; CI-NEXT:    s_waitcnt vmcnt(8)
; CI-NEXT:    buffer_store_dword v33, v0, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v33, vcc, 0xfc, v0
; CI-NEXT:    s_waitcnt vmcnt(8)
; CI-NEXT:    buffer_store_dword v4, v33, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v4, vcc, 0xf8, v0
; CI-NEXT:    buffer_store_dword v3, v4, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v3, vcc, 0xf4, v0
; CI-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v2, vcc, 0xf0, v0
; CI-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 0xec, v0
; CI-NEXT:    v_add_i32_e32 v2, vcc, 0xe8, v0
; CI-NEXT:    v_add_i32_e32 v3, vcc, 0xe4, v0
; CI-NEXT:    s_waitcnt vmcnt(11)
; CI-NEXT:    buffer_store_dword v8, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 0xe0, v0
; CI-NEXT:    buffer_store_dword v7, v2, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v2, vcc, 0xdc, v0
; CI-NEXT:    buffer_store_dword v6, v3, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v3, vcc, 0xd8, v0
; CI-NEXT:    buffer_store_dword v5, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 0xd4, v0
; CI-NEXT:    v_add_i32_e32 v4, vcc, 0xd0, v0
; CI-NEXT:    v_add_i32_e32 v5, vcc, 0xcc, v0
; CI-NEXT:    v_add_i32_e32 v6, vcc, 0xc8, v0
; CI-NEXT:    s_waitcnt vmcnt(14)
; CI-NEXT:    buffer_store_dword v12, v2, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v11, v3, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v10, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 0xb8, v0
; CI-NEXT:    v_add_i32_e32 v7, vcc, 0xc4, v0
; CI-NEXT:    v_add_i32_e32 v2, vcc, 0xc0, v0
; CI-NEXT:    v_add_i32_e32 v3, vcc, 0xbc, v0
; CI-NEXT:    buffer_store_dword v9, v4, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v4, vcc, 0xb4, v0
; CI-NEXT:    v_add_i32_e32 v8, vcc, 0xb0, v0
; CI-NEXT:    v_add_i32_e32 v9, vcc, 0xac, v0
; CI-NEXT:    v_add_i32_e32 v10, vcc, 0xa8, v0
; CI-NEXT:    v_add_i32_e32 v11, vcc, 0xa4, v0
; CI-NEXT:    s_waitcnt vmcnt(14)
; CI-NEXT:    buffer_store_dword v16, v5, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v5, vcc, 0xa0, v0
; CI-NEXT:    buffer_store_dword v15, v6, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v14, v7, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v13, v2, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v20, v3, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v19, v1, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v18, v4, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v17, v8, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v24, v9, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v23, v10, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v22, v11, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v21, v5, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x9c, v0
; CI-NEXT:    buffer_store_dword v28, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x98, v0
; CI-NEXT:    buffer_store_dword v27, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x94, v0
; CI-NEXT:    buffer_store_dword v26, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x90, v0
; CI-NEXT:    buffer_store_dword v25, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x8c, v0
; CI-NEXT:    s_waitcnt vmcnt(14)
; CI-NEXT:    buffer_store_dword v32, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x88, v0
; CI-NEXT:    buffer_store_dword v31, v1, s[0:3], 0 offen
; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x84, v0
; CI-NEXT:    v_add_i32_e32 v0, vcc, 0x80, v0
; CI-NEXT:    buffer_store_dword v30, v1, s[0:3], 0 offen
; CI-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen
; CI-NEXT:    s_waitcnt vmcnt(0)
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX8-LABEL: struct_i32_v32i32_func_void:
; GFX8:       ; %bb.0:
; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX8-NEXT:    s_mov_b32 s7, 0xf000
; GFX8-NEXT:    s_mov_b32 s6, -1
; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
; GFX8-NEXT:    buffer_load_dword v33, off, s[4:7], 0
; GFX8-NEXT:    buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:240
; GFX8-NEXT:    buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:224
; GFX8-NEXT:    buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:208
; GFX8-NEXT:    buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:192
; GFX8-NEXT:    buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:176
; GFX8-NEXT:    buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:160
; GFX8-NEXT:    buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:144
; GFX8-NEXT:    buffer_load_dwordx4 v[29:32], off, s[4:7], 0 offset:128
; GFX8-NEXT:    s_waitcnt vmcnt(8)
; GFX8-NEXT:    buffer_store_dword v33, v0, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v33, vcc, 0xfc, v0
; GFX8-NEXT:    s_waitcnt vmcnt(8)
; GFX8-NEXT:    buffer_store_dword v4, v33, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0xf8, v0
; GFX8-NEXT:    buffer_store_dword v3, v4, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0xf4, v0
; GFX8-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0xf0, v0
; GFX8-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0xec, v0
; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0xe8, v0
; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0xe4, v0
; GFX8-NEXT:    s_waitcnt vmcnt(11)
; GFX8-NEXT:    buffer_store_dword v8, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0xe0, v0
; GFX8-NEXT:    buffer_store_dword v7, v2, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0xdc, v0
; GFX8-NEXT:    buffer_store_dword v6, v3, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0xd8, v0
; GFX8-NEXT:    buffer_store_dword v5, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0xd4, v0
; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0xd0, v0
; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 0xcc, v0
; GFX8-NEXT:    v_add_u32_e32 v6, vcc, 0xc8, v0
; GFX8-NEXT:    s_waitcnt vmcnt(14)
; GFX8-NEXT:    buffer_store_dword v12, v2, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v11, v3, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v10, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0xb8, v0
; GFX8-NEXT:    v_add_u32_e32 v7, vcc, 0xc4, v0
; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0xc0, v0
; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0xbc, v0
; GFX8-NEXT:    buffer_store_dword v9, v4, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0xb4, v0
; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 0xb0, v0
; GFX8-NEXT:    v_add_u32_e32 v9, vcc, 0xac, v0
; GFX8-NEXT:    v_add_u32_e32 v10, vcc, 0xa8, v0
; GFX8-NEXT:    v_add_u32_e32 v11, vcc, 0xa4, v0
; GFX8-NEXT:    s_waitcnt vmcnt(14)
; GFX8-NEXT:    buffer_store_dword v16, v5, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 0xa0, v0
; GFX8-NEXT:    buffer_store_dword v15, v6, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v14, v7, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v13, v2, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v20, v3, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v19, v1, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v18, v4, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v17, v8, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v24, v9, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v23, v10, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v22, v11, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v21, v5, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x9c, v0
; GFX8-NEXT:    buffer_store_dword v28, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x98, v0
; GFX8-NEXT:    buffer_store_dword v27, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x94, v0
; GFX8-NEXT:    buffer_store_dword v26, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x90, v0
; GFX8-NEXT:    buffer_store_dword v25, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x8c, v0
; GFX8-NEXT:    s_waitcnt vmcnt(14)
; GFX8-NEXT:    buffer_store_dword v32, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x88, v0
; GFX8-NEXT:    buffer_store_dword v31, v1, s[0:3], 0 offen
; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x84, v0
; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0x80, v0
; GFX8-NEXT:    buffer_store_dword v30, v1, s[0:3], 0 offen
; GFX8-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    s_setpc_b64 s[30:31]
;
; GFX9-LABEL: struct_i32_v32i32_func_void:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX9-NEXT:    s_mov_b32 s7, 0xf000
; GFX9-NEXT:    s_mov_b32 s6, -1
; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
; GFX9-NEXT:    buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:240
; GFX9-NEXT:    buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:224
; GFX9-NEXT:    buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:208
; GFX9-NEXT:    buffer_load_dword v33, off, s[4:7], 0
; GFX9-NEXT:    buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:192
; GFX9-NEXT:    buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:176
; GFX9-NEXT:    buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:160
; GFX9-NEXT:    buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:144
; GFX9-NEXT:    buffer_load_dwordx4 v[29:32], off, s[4:7], 0 offset:128
; GFX9-NEXT:    s_waitcnt vmcnt(8)
; GFX9-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:252
; GFX9-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:248
; GFX9-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:244
; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:240
; GFX9-NEXT:    s_waitcnt vmcnt(11)
; GFX9-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:236
; GFX9-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:232
; GFX9-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:228
; GFX9-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:224
; GFX9-NEXT:    s_waitcnt vmcnt(14)
; GFX9-NEXT:    buffer_store_dword v12, v0, s[0:3], 0 offen offset:220
; GFX9-NEXT:    buffer_store_dword v11, v0, s[0:3], 0 offen offset:216
; GFX9-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:212
; GFX9-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:208
; GFX9-NEXT:    s_waitcnt vmcnt(17)
; GFX9-NEXT:    buffer_store_dword v33, v0, s[0:3], 0 offen
; GFX9-NEXT:    s_waitcnt vmcnt(17)
; GFX9-NEXT:    buffer_store_dword v16, v0, s[0:3], 0 offen offset:204
; GFX9-NEXT:    buffer_store_dword v15, v0, s[0:3], 0 offen offset:200
; GFX9-NEXT:    buffer_store_dword v14, v0, s[0:3], 0 offen offset:196
; GFX9-NEXT:    buffer_store_dword v13, v0, s[0:3], 0 offen offset:192
; GFX9-NEXT:    s_waitcnt vmcnt(20)
; GFX9-NEXT:    buffer_store_dword v20, v0, s[0:3], 0 offen offset:188
; GFX9-NEXT:    buffer_store_dword v19, v0, s[0:3], 0 offen offset:184
; GFX9-NEXT:    buffer_store_dword v18, v0, s[0:3], 0 offen offset:180
; GFX9-NEXT:    buffer_store_dword v17, v0, s[0:3], 0 offen offset:176
; GFX9-NEXT:    s_waitcnt vmcnt(23)
; GFX9-NEXT:    buffer_store_dword v24, v0, s[0:3], 0 offen offset:172
; GFX9-NEXT:    buffer_store_dword v23, v0, s[0:3], 0 offen offset:168
; GFX9-NEXT:    buffer_store_dword v22, v0, s[0:3], 0 offen offset:164
; GFX9-NEXT:    buffer_store_dword v21, v0, s[0:3], 0 offen offset:160
; GFX9-NEXT:    s_waitcnt vmcnt(26)
; GFX9-NEXT:    buffer_store_dword v28, v0, s[0:3], 0 offen offset:156
; GFX9-NEXT:    buffer_store_dword v27, v0, s[0:3], 0 offen offset:152
; GFX9-NEXT:    buffer_store_dword v26, v0, s[0:3], 0 offen offset:148
; GFX9-NEXT:    buffer_store_dword v25, v0, s[0:3], 0 offen offset:144
; GFX9-NEXT:    s_waitcnt vmcnt(29)
; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:140
; GFX9-NEXT:    buffer_store_dword v31, v0, s[0:3], 0 offen offset:136
; GFX9-NEXT:    buffer_store_dword v30, v0, s[0:3], 0 offen offset:132
; GFX9-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen offset:128
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: struct_i32_v32i32_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_clause 0x8
; GFX11-NEXT:    buffer_load_b128 v[1:4], off, s[0:3], 0 offset:240
; GFX11-NEXT:    buffer_load_b128 v[5:8], off, s[0:3], 0 offset:224
; GFX11-NEXT:    buffer_load_b128 v[9:12], off, s[0:3], 0 offset:208
; GFX11-NEXT:    buffer_load_b128 v[13:16], off, s[0:3], 0 offset:192
; GFX11-NEXT:    buffer_load_b128 v[17:20], off, s[0:3], 0 offset:176
; GFX11-NEXT:    buffer_load_b128 v[21:24], off, s[0:3], 0 offset:160
; GFX11-NEXT:    buffer_load_b128 v[25:28], off, s[0:3], 0 offset:144
; GFX11-NEXT:    buffer_load_b128 v[29:32], off, s[0:3], 0 offset:128
; GFX11-NEXT:    buffer_load_b32 v33, off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(8)
; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:240
; GFX11-NEXT:    s_waitcnt vmcnt(7)
; GFX11-NEXT:    scratch_store_b128 v0, v[5:8], off offset:224
; GFX11-NEXT:    s_waitcnt vmcnt(6)
; GFX11-NEXT:    scratch_store_b128 v0, v[9:12], off offset:208
; GFX11-NEXT:    s_waitcnt vmcnt(5)
; GFX11-NEXT:    scratch_store_b128 v0, v[13:16], off offset:192
; GFX11-NEXT:    s_waitcnt vmcnt(4)
; GFX11-NEXT:    scratch_store_b128 v0, v[17:20], off offset:176
; GFX11-NEXT:    s_waitcnt vmcnt(3)
; GFX11-NEXT:    scratch_store_b128 v0, v[21:24], off offset:160
; GFX11-NEXT:    s_waitcnt vmcnt(2)
; GFX11-NEXT:    scratch_store_b128 v0, v[25:28], off offset:144
; GFX11-NEXT:    s_waitcnt vmcnt(1)
; GFX11-NEXT:    scratch_store_b128 v0, v[29:32], off offset:128
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    scratch_store_b32 v0, v33, off
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
  %val = load { i32, <32 x i32> }, ptr addrspace(1) %ptr
  ret { i32, <32 x i32> }%val
}

; Make sure the last struct component is returned in v3, not v4.
define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 {
; CI-LABEL: v3i32_struct_func_void_wasted_reg:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    s_mov_b32 m0, -1
; CI-NEXT:    ds_read_b32 v0, v0
; CI-NEXT:    s_waitcnt lgkmcnt(0)
; CI-NEXT:    ds_read_b32 v1, v0
; CI-NEXT:    ds_read_b32 v2, v0
; CI-NEXT:    ds_read_b32 v3, v0
; CI-NEXT:    s_waitcnt lgkmcnt(0)
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v3i32_struct_func_void_wasted_reg:
; GFX8:       ; %bb.0:
; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT:    s_mov_b32 m0, -1
; GFX8-NEXT:    ds_read_b32 v0, v0
; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
; GFX8-NEXT:    ds_read_b32 v1, v0
; GFX8-NEXT:    ds_read_b32 v2, v0
; GFX8-NEXT:    ds_read_b32 v3, v0
; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
; GFX8-NEXT:    s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v3i32_struct_func_void_wasted_reg:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    ds_read_b32 v0, v0
; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
; GFX9-NEXT:    ds_read_b32 v1, v0
; GFX9-NEXT:    ds_read_b32 v2, v0
; GFX9-NEXT:    ds_read_b32 v3, v0
; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v3i32_struct_func_void_wasted_reg:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    ds_load_b32 v0, v0
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    ds_load_b32 v1, v0
; GFX11-NEXT:    ds_load_b32 v2, v0
; GFX11-NEXT:    ds_load_b32 v3, v0
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %load0 = load volatile i32, ptr addrspace(3) undef
  %load1 = load volatile i32, ptr addrspace(3) undef
  %load2 = load volatile i32, ptr addrspace(3) undef
  %load3 = load volatile i32, ptr addrspace(3) undef

  %insert.0 = insertelement <3 x i32> undef, i32 %load0, i32 0
  %insert.1 = insertelement <3 x i32> %insert.0, i32 %load1, i32 1
  %insert.2 = insertelement <3 x i32> %insert.1, i32 %load2, i32 2
  %insert.3 = insertvalue { <3 x i32>, i32 } undef, <3 x i32> %insert.2, 0
  %insert.4 = insertvalue { <3 x i32>, i32 } %insert.3, i32 %load3, 1
  ret { <3 x i32>, i32 } %insert.4
}

define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 {
; CI-LABEL: v3f32_struct_func_void_wasted_reg:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    s_mov_b32 m0, -1
; CI-NEXT:    ds_read_b32 v0, v0
; CI-NEXT:    s_waitcnt lgkmcnt(0)
; CI-NEXT:    ds_read_b32 v1, v0
; CI-NEXT:    ds_read_b32 v2, v0
; CI-NEXT:    ds_read_b32 v3, v0
; CI-NEXT:    s_waitcnt lgkmcnt(0)
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v3f32_struct_func_void_wasted_reg:
; GFX8:       ; %bb.0:
; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT:    s_mov_b32 m0, -1
; GFX8-NEXT:    ds_read_b32 v0, v0
; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
; GFX8-NEXT:    ds_read_b32 v1, v0
; GFX8-NEXT:    ds_read_b32 v2, v0
; GFX8-NEXT:    ds_read_b32 v3, v0
; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
; GFX8-NEXT:    s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v3f32_struct_func_void_wasted_reg:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    ds_read_b32 v0, v0
; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
; GFX9-NEXT:    ds_read_b32 v1, v0
; GFX9-NEXT:    ds_read_b32 v2, v0
; GFX9-NEXT:    ds_read_b32 v3, v0
; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v3f32_struct_func_void_wasted_reg:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    ds_load_b32 v0, v0
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    ds_load_b32 v1, v0
; GFX11-NEXT:    ds_load_b32 v2, v0
; GFX11-NEXT:    ds_load_b32 v3, v0
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %load0 = load volatile float, ptr addrspace(3) undef
  %load1 = load volatile float, ptr addrspace(3) undef
  %load2 = load volatile float, ptr addrspace(3) undef
  %load3 = load volatile i32, ptr addrspace(3) undef

  %insert.0 = insertelement <3 x float> undef, float %load0, i32 0
  %insert.1 = insertelement <3 x float> %insert.0, float %load1, i32 1
  %insert.2 = insertelement <3 x float> %insert.1, float %load2, i32 2
  %insert.3 = insertvalue { <3 x float>, i32 } undef, <3 x float> %insert.2, 0
  %insert.4 = insertvalue { <3 x float>, i32 } %insert.3, i32 %load3, 1
  ret { <3 x float>, i32 } %insert.4
}

define void @void_func_sret_max_known_zero_bits(ptr addrspace(5) sret(i8) %arg0) #0 {
; CI-LABEL: void_func_sret_max_known_zero_bits:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
; CI-NEXT:    s_mov_b32 m0, -1
; CI-NEXT:    ds_write_b32 v0, v0
; CI-NEXT:    v_mov_b32_e32 v0, 0
; CI-NEXT:    ds_write_b32 v0, v0
; CI-NEXT:    ds_write_b32 v0, v0
; CI-NEXT:    s_waitcnt lgkmcnt(0)
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX8-LABEL: void_func_sret_max_known_zero_bits:
; GFX8:       ; %bb.0:
; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
; GFX8-NEXT:    s_mov_b32 m0, -1
; GFX8-NEXT:    ds_write_b32 v0, v0
; GFX8-NEXT:    v_mov_b32_e32 v0, 0
; GFX8-NEXT:    ds_write_b32 v0, v0
; GFX8-NEXT:    ds_write_b32 v0, v0
; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
; GFX8-NEXT:    s_setpc_b64 s[30:31]
;
; GFX9-LABEL: void_func_sret_max_known_zero_bits:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
; GFX9-NEXT:    ds_write_b32 v0, v0
; GFX9-NEXT:    v_mov_b32_e32 v0, 0
; GFX9-NEXT:    ds_write_b32 v0, v0
; GFX9-NEXT:    ds_write_b32 v0, v0
; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_sret_max_known_zero_bits:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 17, v0
; GFX11-NEXT:    v_mov_b32_e32 v2, 0
; GFX11-NEXT:    ds_store_b32 v0, v1
; GFX11-NEXT:    ds_store_b32 v0, v0
; GFX11-NEXT:    ds_store_b32 v0, v2
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %arg0.int = ptrtoint ptr addrspace(5) %arg0 to i32

  %lshr0 = lshr i32 %arg0.int, 16
  %lshr1 = lshr i32 %arg0.int, 17
  %lshr2 = lshr i32 %arg0.int, 18

  store volatile i32 %lshr0, ptr addrspace(3) undef
  store volatile i32 %lshr1, ptr addrspace(3) undef
  store volatile i32 %lshr2, ptr addrspace(3) undef
  ret void
}

define bfloat @bf16_func_void() #0 {
; CI-LABEL: bf16_func_void:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    s_mov_b32 s7, 0xf000
; CI-NEXT:    s_mov_b32 s6, -1
; CI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
; CI-NEXT:    s_waitcnt vmcnt(0)
; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX89-LABEL: bf16_func_void:
; GFX89:       ; %bb.0:
; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT:    s_mov_b32 s7, 0xf000
; GFX89-NEXT:    s_mov_b32 s6, -1
; GFX89-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
; GFX89-NEXT:    s_waitcnt vmcnt(0)
; GFX89-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: bf16_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_u16 v0, off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load bfloat, ptr addrspace(1) undef
  ret bfloat %val
}

define <2 x bfloat> @v2bf16_func_void() #0 {
; CI-LABEL: v2bf16_func_void:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    s_mov_b32 s7, 0xf000
; CI-NEXT:    s_mov_b32 s6, -1
; CI-NEXT:    buffer_load_dword v1, off, s[4:7], 0
; CI-NEXT:    s_waitcnt vmcnt(0)
; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
; CI-NEXT:    v_and_b32_e32 v1, 0xffff0000, v1
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v2bf16_func_void:
; GFX89:       ; %bb.0:
; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT:    s_mov_b32 s7, 0xf000
; GFX89-NEXT:    s_mov_b32 s6, -1
; GFX89-NEXT:    buffer_load_dword v0, off, s[4:7], 0
; GFX89-NEXT:    s_waitcnt vmcnt(0)
; GFX89-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v2bf16_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load <2 x bfloat>, ptr addrspace(1) undef
  ret <2 x bfloat> %val
}

define <3 x bfloat> @v3bf16_func_void() #0 {
; CI-LABEL: v3bf16_func_void:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    s_mov_b32 s7, 0xf000
; CI-NEXT:    s_mov_b32 s6, -1
; CI-NEXT:    buffer_load_dwordx2 v[1:2], off, s[4:7], 0
; CI-NEXT:    s_waitcnt vmcnt(0)
; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
; CI-NEXT:    v_and_b32_e32 v1, 0xffff0000, v1
; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v3bf16_func_void:
; GFX89:       ; %bb.0:
; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT:    s_mov_b32 s7, 0xf000
; GFX89-NEXT:    s_mov_b32 s6, -1
; GFX89-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; GFX89-NEXT:    s_waitcnt vmcnt(0)
; GFX89-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v3bf16_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load <3 x bfloat>, ptr addrspace(1) undef
  ret <3 x bfloat> %val
}

define <4 x bfloat> @v4bf16_func_void() #0 {
; CI-LABEL: v4bf16_func_void:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    s_mov_b32 s7, 0xf000
; CI-NEXT:    s_mov_b32 s6, -1
; CI-NEXT:    buffer_load_dwordx2 v[2:3], off, s[4:7], 0
; CI-NEXT:    s_waitcnt vmcnt(0)
; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v2
; CI-NEXT:    v_and_b32_e32 v1, 0xffff0000, v2
; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
; CI-NEXT:    v_and_b32_e32 v3, 0xffff0000, v3
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v4bf16_func_void:
; GFX89:       ; %bb.0:
; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT:    s_mov_b32 s7, 0xf000
; GFX89-NEXT:    s_mov_b32 s6, -1
; GFX89-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
; GFX89-NEXT:    s_waitcnt vmcnt(0)
; GFX89-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v4bf16_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load <4 x bfloat>, ptr addrspace(1) undef
  ret <4 x bfloat> %val
}

define <6 x bfloat> @v6bf16_func_void() #0 {
; CI-LABEL: v6bf16_func_void:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    s_mov_b32 s7, 0xf000
; CI-NEXT:    s_mov_b32 s6, -1
; CI-NEXT:    buffer_load_dwordx3 v[3:5], off, s[4:7], 0
; CI-NEXT:    s_waitcnt vmcnt(0)
; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v3
; CI-NEXT:    v_and_b32_e32 v1, 0xffff0000, v3
; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
; CI-NEXT:    v_and_b32_e32 v3, 0xffff0000, v4
; CI-NEXT:    v_lshlrev_b32_e32 v4, 16, v5
; CI-NEXT:    v_and_b32_e32 v5, 0xffff0000, v5
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v6bf16_func_void:
; GFX89:       ; %bb.0:
; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT:    s_mov_b32 s7, 0xf000
; GFX89-NEXT:    s_mov_b32 s6, -1
; GFX89-NEXT:    buffer_load_dwordx3 v[0:2], off, s[4:7], 0
; GFX89-NEXT:    s_waitcnt vmcnt(0)
; GFX89-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v6bf16_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_b96 v[0:2], off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load <6 x bfloat>, ptr addrspace(1) undef
  ret <6 x bfloat> %val
}

define <8 x bfloat> @v8bf16_func_void() #0 {
; CI-LABEL: v8bf16_func_void:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    s_mov_b32 s7, 0xf000
; CI-NEXT:    s_mov_b32 s6, -1
; CI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0
; CI-NEXT:    s_waitcnt vmcnt(0)
; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v4
; CI-NEXT:    v_and_b32_e32 v1, 0xffff0000, v4
; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
; CI-NEXT:    v_and_b32_e32 v3, 0xffff0000, v5
; CI-NEXT:    v_lshlrev_b32_e32 v4, 16, v6
; CI-NEXT:    v_and_b32_e32 v5, 0xffff0000, v6
; CI-NEXT:    v_lshlrev_b32_e32 v6, 16, v7
; CI-NEXT:    v_and_b32_e32 v7, 0xffff0000, v7
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v8bf16_func_void:
; GFX89:       ; %bb.0:
; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT:    s_mov_b32 s7, 0xf000
; GFX89-NEXT:    s_mov_b32 s6, -1
; GFX89-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX89-NEXT:    s_waitcnt vmcnt(0)
; GFX89-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v8bf16_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load <8 x bfloat>, ptr addrspace(1) undef
  ret <8 x bfloat> %val
}

define <16 x bfloat> @v16bf16_func_void() #0 {
; CI-LABEL: v16bf16_func_void:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    s_mov_b32 s7, 0xf000
; CI-NEXT:    s_mov_b32 s6, -1
; CI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0
; CI-NEXT:    s_waitcnt vmcnt(0)
; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v4
; CI-NEXT:    v_and_b32_e32 v1, 0xffff0000, v4
; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
; CI-NEXT:    v_and_b32_e32 v3, 0xffff0000, v5
; CI-NEXT:    v_lshlrev_b32_e32 v4, 16, v6
; CI-NEXT:    v_and_b32_e32 v5, 0xffff0000, v6
; CI-NEXT:    v_lshlrev_b32_e32 v6, 16, v7
; CI-NEXT:    v_and_b32_e32 v7, 0xffff0000, v7
; CI-NEXT:    v_mov_b32_e32 v8, v0
; CI-NEXT:    v_mov_b32_e32 v9, v1
; CI-NEXT:    v_mov_b32_e32 v10, v2
; CI-NEXT:    v_mov_b32_e32 v11, v3
; CI-NEXT:    v_mov_b32_e32 v12, v4
; CI-NEXT:    v_mov_b32_e32 v13, v5
; CI-NEXT:    v_mov_b32_e32 v14, v6
; CI-NEXT:    v_mov_b32_e32 v15, v7
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v16bf16_func_void:
; GFX89:       ; %bb.0:
; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT:    s_mov_b32 s7, 0xf000
; GFX89-NEXT:    s_mov_b32 s6, -1
; GFX89-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX89-NEXT:    s_waitcnt vmcnt(0)
; GFX89-NEXT:    v_mov_b32_e32 v4, v0
; GFX89-NEXT:    v_mov_b32_e32 v5, v1
; GFX89-NEXT:    v_mov_b32_e32 v6, v2
; GFX89-NEXT:    v_mov_b32_e32 v7, v3
; GFX89-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v16bf16_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    v_dual_mov_b32 v4, v0 :: v_dual_mov_b32 v5, v1
; GFX11-NEXT:    v_dual_mov_b32 v6, v2 :: v_dual_mov_b32 v7, v3
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load <16 x bfloat>, ptr addrspace(1) undef
  ret <16 x bfloat> %val
}

define <32 x bfloat> @v32bf16_func_void() #0 {
; CI-LABEL: v32bf16_func_void:
; CI:       ; %bb.0:
; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CI-NEXT:    s_mov_b32 s7, 0xf000
; CI-NEXT:    s_mov_b32 s6, -1
; CI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0
; CI-NEXT:    s_waitcnt vmcnt(0)
; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v4
; CI-NEXT:    v_and_b32_e32 v1, 0xffff0000, v4
; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
; CI-NEXT:    v_and_b32_e32 v3, 0xffff0000, v5
; CI-NEXT:    v_lshlrev_b32_e32 v4, 16, v6
; CI-NEXT:    v_and_b32_e32 v5, 0xffff0000, v6
; CI-NEXT:    v_lshlrev_b32_e32 v6, 16, v7
; CI-NEXT:    v_and_b32_e32 v7, 0xffff0000, v7
; CI-NEXT:    v_mov_b32_e32 v8, v0
; CI-NEXT:    v_mov_b32_e32 v9, v1
; CI-NEXT:    v_mov_b32_e32 v10, v2
; CI-NEXT:    v_mov_b32_e32 v11, v3
; CI-NEXT:    v_mov_b32_e32 v12, v4
; CI-NEXT:    v_mov_b32_e32 v13, v5
; CI-NEXT:    v_mov_b32_e32 v14, v6
; CI-NEXT:    v_mov_b32_e32 v16, v0
; CI-NEXT:    v_mov_b32_e32 v17, v1
; CI-NEXT:    v_mov_b32_e32 v18, v2
; CI-NEXT:    v_mov_b32_e32 v19, v3
; CI-NEXT:    v_mov_b32_e32 v20, v4
; CI-NEXT:    v_mov_b32_e32 v21, v5
; CI-NEXT:    v_mov_b32_e32 v24, v0
; CI-NEXT:    v_mov_b32_e32 v25, v1
; CI-NEXT:    v_mov_b32_e32 v26, v2
; CI-NEXT:    v_mov_b32_e32 v27, v3
; CI-NEXT:    v_mov_b32_e32 v28, v4
; CI-NEXT:    v_mov_b32_e32 v29, v5
; CI-NEXT:    v_mov_b32_e32 v22, v6
; CI-NEXT:    v_mov_b32_e32 v30, v6
; CI-NEXT:    v_mov_b32_e32 v15, v7
; CI-NEXT:    v_mov_b32_e32 v23, v7
; CI-NEXT:    v_mov_b32_e32 v31, v7
; CI-NEXT:    s_setpc_b64 s[30:31]
;
; GFX89-LABEL: v32bf16_func_void:
; GFX89:       ; %bb.0:
; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX89-NEXT:    s_mov_b32 s7, 0xf000
; GFX89-NEXT:    s_mov_b32 s6, -1
; GFX89-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
; GFX89-NEXT:    s_waitcnt vmcnt(0)
; GFX89-NEXT:    v_mov_b32_e32 v4, v0
; GFX89-NEXT:    v_mov_b32_e32 v5, v1
; GFX89-NEXT:    v_mov_b32_e32 v6, v2
; GFX89-NEXT:    v_mov_b32_e32 v7, v3
; GFX89-NEXT:    v_mov_b32_e32 v8, v0
; GFX89-NEXT:    v_mov_b32_e32 v9, v1
; GFX89-NEXT:    v_mov_b32_e32 v10, v2
; GFX89-NEXT:    v_mov_b32_e32 v11, v3
; GFX89-NEXT:    v_mov_b32_e32 v12, v0
; GFX89-NEXT:    v_mov_b32_e32 v13, v1
; GFX89-NEXT:    v_mov_b32_e32 v14, v2
; GFX89-NEXT:    v_mov_b32_e32 v15, v3
; GFX89-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v32bf16_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
; GFX11-NEXT:    s_mov_b32 s2, -1
; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    v_dual_mov_b32 v4, v0 :: v_dual_mov_b32 v5, v1
; GFX11-NEXT:    v_dual_mov_b32 v6, v2 :: v_dual_mov_b32 v7, v3
; GFX11-NEXT:    v_dual_mov_b32 v8, v0 :: v_dual_mov_b32 v9, v1
; GFX11-NEXT:    v_dual_mov_b32 v10, v2 :: v_dual_mov_b32 v11, v3
; GFX11-NEXT:    v_dual_mov_b32 v12, v0 :: v_dual_mov_b32 v13, v1
; GFX11-NEXT:    v_dual_mov_b32 v14, v2 :: v_dual_mov_b32 v15, v3
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val = load <32 x bfloat>, ptr addrspace(1) undef
  ret <32 x bfloat> %val
}

attributes #0 = { nounwind }