llvm/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll

; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s

declare hidden void @external_void_func_void() #0

declare hidden void @external_void_func_empty_struct({}, i32) #0
declare hidden void @external_void_func_empty_array([0 x i8], i32) #0

declare hidden void @external_void_func_i1(i1) #0
declare hidden void @external_void_func_i1_signext(i1 signext) #0
declare hidden void @external_void_func_i1_zeroext(i1 zeroext) #0

declare hidden void @external_void_func_i8(i8) #0
declare hidden void @external_void_func_i8_signext(i8 signext) #0
declare hidden void @external_void_func_i8_zeroext(i8 zeroext) #0

declare hidden void @external_void_func_i16(i16) #0
declare hidden void @external_void_func_i16_signext(i16 signext) #0
declare hidden void @external_void_func_i16_zeroext(i16 zeroext) #0

declare hidden void @external_void_func_i32(i32) #0
declare hidden void @external_void_func_i64(i64) #0
declare hidden void @external_void_func_v2i64(<2 x i64>) #0
declare hidden void @external_void_func_v3i64(<3 x i64>) #0
declare hidden void @external_void_func_v4i64(<4 x i64>) #0


declare hidden void @external_void_func_i48(i48) #0
declare hidden void @external_void_func_i48_signext(i48 signext) #0
declare hidden void @external_void_func_i48_zeroext(i48 zeroext) #0

declare hidden void @external_void_func_p0(ptr) #0
declare hidden void @external_void_func_v2p0(<2 x ptr>) #0

declare hidden void @external_void_func_f16(half) #0
declare hidden void @external_void_func_f32(float) #0
declare hidden void @external_void_func_f64(double) #0
declare hidden void @external_void_func_v2f32(<2 x float>) #0
declare hidden void @external_void_func_v2f64(<2 x double>) #0
declare hidden void @external_void_func_v3f32(<3 x float>) #0
declare hidden void @external_void_func_v3f64(<3 x double>) #0
declare hidden void @external_void_func_v5f32(<5 x float>) #0

declare hidden void @external_void_func_v2i16(<2 x i16>) #0
declare hidden void @external_void_func_v2f16(<2 x half>) #0
declare hidden void @external_void_func_v3i16(<3 x i16>) #0
declare hidden void @external_void_func_v3f16(<3 x half>) #0
declare hidden void @external_void_func_v4i16(<4 x i16>) #0
declare hidden void @external_void_func_v4f16(<4 x half>) #0
declare hidden void @external_void_func_v5i16(<5 x i16>) #0
declare hidden void @external_void_func_v7i16(<7 x i16>) #0
declare hidden void @external_void_func_v63i16(<63 x i16>) #0
declare hidden void @external_void_func_v65i16(<65 x i16>) #0
declare hidden void @external_void_func_v66i16(<66 x i16>) #0

declare hidden void @external_void_func_v2i32(<2 x i32>) #0
declare hidden void @external_void_func_v3i32(<3 x i32>) #0
declare hidden void @external_void_func_v3i32_i32(<3 x i32>, i32) #0
declare hidden void @external_void_func_v4i32(<4 x i32>) #0
declare hidden void @external_void_func_v5i32(<5 x i32>) #0
declare hidden void @external_void_func_v8i32(<8 x i32>) #0
declare hidden void @external_void_func_v16i32(<16 x i32>) #0
declare hidden void @external_void_func_v32i32(<32 x i32>) #0
declare hidden void @external_void_func_v32i32_i32(<32 x i32>, i32) #0
declare hidden void @external_void_func_v32i32_p3_p5(<32 x i32>, ptr addrspace(3), ptr addrspace(5)) #0
declare hidden void @external_void_func_v32i32_i8_i8_i16(<32 x i32>, i8, i8, i16) #0

; Structs
declare hidden void @external_void_func_struct_i8_i32({ i8, i32 }) #0
declare hidden void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 })) #0
declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0

declare hidden void @external_void_func_v2i8(<2 x i8>) #0
declare hidden void @external_void_func_v3i8(<3 x i8>) #0
declare hidden void @external_void_func_v4i8(<4 x i8>) #0
declare hidden void @external_void_func_v8i8(<8 x i8>) #0
declare hidden void @external_void_func_v16i8(<16 x i8>) #0

declare hidden void @byval_align16_f64_arg(<32 x i32>, ptr addrspace(5) byval(double) align 16) #0
declare hidden void @stack_passed_f64_arg(<32 x i32>, double) #0
declare hidden void @external_void_func_12xv3i32(<3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>,
    <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) #0
declare hidden void @external_void_func_8xv5i32(<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>,
    <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>) #0
declare hidden void @external_void_func_12xv3f32(<3 x float>, <3 x float>, <3 x float>, <3 x float>,
    <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>) #0
declare hidden void @external_void_func_8xv5f32(<5 x float>, <5 x float>, <5 x float>, <5 x float>,
    <5 x float>, <5 x float>, <5 x float>, <5 x float>) #0

declare hidden void @external_void_func_i16_inreg(i32 inreg) #0
declare hidden void @external_void_func_i32_inreg(i32 inreg) #0
declare hidden void @external_void_func_i64_inreg(i64 inreg) #0
declare hidden void @external_void_func_v2i32_inreg(<2 x i32> inreg) #0
declare hidden void @external_void_func_f16_inreg(half inreg) #0
declare hidden void @external_void_func_bf16_inreg(bfloat inreg) #0
declare hidden void @external_void_func_f32_inreg(float inreg) #0
declare hidden void @external_void_func_f64_inreg(double inreg) #0
declare hidden void @external_void_func_v2f16_inreg(<2 x half> inreg) #0
declare hidden void @external_void_func_v3f16_inreg(<3 x half> inreg) #0
declare hidden void @external_void_func_v4f16_inreg(<4 x half> inreg) #0

declare hidden void @external_void_func_p0_inreg(ptr inreg) #0
declare hidden void @external_void_func_p1_inreg(ptr addrspace(1) inreg) #0
declare hidden void @external_void_func_p3_inreg(ptr addrspace(3) inreg) #0
declare hidden void @external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg) #0
declare hidden void @external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inreg) #0

; amdgpu_gfx calling convention
declare hidden amdgpu_gfx void @external_gfx_void_func_void() #0
declare hidden amdgpu_gfx void @external_gfx_void_func_i32(i32) #0
declare hidden amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg) #0
declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 }) #0
declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg) #0

define amdgpu_kernel void @test_call_external_void_func_void() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_void
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_void
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_void()
  ret void
}

define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 {
  ; CHECK-LABEL: name: test_gfx_call_external_void_func_void
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call amdgpu_gfx void @external_gfx_void_func_void()
  ret void
}

define void @test_func_call_external_void_func_void() #0 {
  ; CHECK-LABEL: name: test_func_call_external_void_func_void
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_void
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call void @external_void_func_void()
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_empty_struct() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_empty_struct
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_empty_struct
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   $vgpr0 = COPY [[C]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_empty_struct, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_empty_struct({} zeroinitializer, i32 23)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_empty_array() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_empty_array
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_empty_array
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   $vgpr0 = COPY [[C]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_empty_array, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_empty_array([0 x i8] zeroinitializer, i32 23)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_i1_imm
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s1)
  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i1, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_i1(i1 true)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_i1_signext
  ; CHECK: bb.1 (%ir-block.1):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `ptr addrspace(1) undef`, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1_signext
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1)
  ; CHECK-NEXT:   $vgpr0 = COPY [[SEXT]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i1_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %var = load volatile i1, ptr addrspace(1) undef
  call void @external_void_func_i1_signext(i1 signext %var)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_i1_zeroext
  ; CHECK: bb.1 (%ir-block.1):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `ptr addrspace(1) undef`, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1_zeroext
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1)
  ; CHECK-NEXT:   $vgpr0 = COPY [[ZEXT]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i1_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %var = load volatile i1, ptr addrspace(1) undef
  call void @external_void_func_i1_zeroext(i1 zeroext %var)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_i8_imm
  ; CHECK: bb.1 (%ir-block.1):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 123
  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[C]](s8)
  ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT1]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i8, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_i8(i8 123)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_i8_signext
  ; CHECK: bb.1 (%ir-block.1):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8_signext
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[LOAD]](s8)
  ; CHECK-NEXT:   [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[SEXT]](s16)
  ; CHECK-NEXT:   $vgpr0 = COPY [[SEXT1]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i8_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %var = load volatile i8, ptr addrspace(1) undef
  call void @external_void_func_i8_signext(i8 signext %var)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_i8_zeroext
  ; CHECK: bb.1 (%ir-block.1):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8_zeroext
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[LOAD]](s8)
  ; CHECK-NEXT:   [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ZEXT]](s16)
  ; CHECK-NEXT:   $vgpr0 = COPY [[ZEXT1]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i8_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %var = load volatile i8, ptr addrspace(1) undef
  call void @external_void_func_i8_zeroext(i8 zeroext %var)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_i16_imm
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 123
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16)
  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_i16(i16 123)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_i16_signext
  ; CHECK: bb.1 (%ir-block.1):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_signext
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16)
  ; CHECK-NEXT:   $vgpr0 = COPY [[SEXT]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %var = load volatile i16, ptr addrspace(1) undef
  call void @external_void_func_i16_signext(i16 signext %var)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_i16_zeroext
  ; CHECK: bb.1 (%ir-block.1):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_zeroext
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16)
  ; CHECK-NEXT:   $vgpr0 = COPY [[ZEXT]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %var = load volatile i16, ptr addrspace(1) undef
  call void @external_void_func_i16_zeroext(i16 zeroext %var)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_i32_imm
  ; CHECK: bb.1 (%ir-block.1):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   $vgpr0 = COPY [[C]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_i32(i32 42)
  ret void
}

define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 {
  ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm
  ; CHECK: bb.1 (%ir-block.1):
  ; CHECK-NEXT:   liveins: $vgpr0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32
  ; CHECK-NEXT:   $vgpr0 = COPY [[C]](s32)
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call amdgpu_gfx void @external_gfx_void_func_i32(i32 42)
  ret void
}

define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) #0 {
  ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm_inreg
  ; CHECK: bb.1 (%ir-block.1):
  ; CHECK-NEXT:   liveins: $sgpr4
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr4
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg
  ; CHECK-NEXT:   $sgpr4 = COPY [[C]](s32)
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_i64_imm
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 123
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i64
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_i64(i64 123)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v2i64
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `ptr addrspace(1) null`, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i64
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %val = load <2 x i64>, ptr addrspace(1) null
  call void @external_void_func_v2i64(<2 x i64> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v2i64_imm
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i64
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C2]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_i48
  ; CHECK: bb.1 (%ir-block.1):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48)
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i48, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %var = load volatile i48, ptr addrspace(1) undef
  call void @external_void_func_i48(i48 %var)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_i48_signext
  ; CHECK: bb.1 (%ir-block.1):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48_signext
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48)
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i48_signext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %var = load volatile i48, ptr addrspace(1) undef
  call void @external_void_func_i48_signext(i48 signext %var)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_i48_zeroext
  ; CHECK: bb.1 (%ir-block.1):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48_zeroext
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48)
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i48_zeroext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %var = load volatile i48, ptr addrspace(1) undef
  call void @external_void_func_i48_zeroext(i48 zeroext %var)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_p0_imm(ptr %arg) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_p0_imm
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.arg.kernarg.offset1, align 16, addrspace 4)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_p0
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p0)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_p0(ptr %arg)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v2p0
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x p0>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x p0>) from `ptr addrspace(1) null`, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2p0
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x p0>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %val = load <2 x ptr>, ptr addrspace(1) null
  call void @external_void_func_v2p0(<2 x ptr> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v3i64
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[DEF]](s64)
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `ptr addrspace(1) null`, addrspace 1)
  ; CHECK-NEXT:   [[SHUF:%[0-9]+]]:_(<3 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i64
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C2]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<3 x s64>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %load = load <2 x i64>, ptr addrspace(1) null
  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>

  call void @external_void_func_v3i64(<3 x i64> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v4i64
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C2]](s64)
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `ptr addrspace(1) null`, addrspace 1)
  ; CHECK-NEXT:   [[SHUF:%[0-9]+]]:_(<4 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2, 3)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i64
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SHUF]](<4 x s64>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %load = load <2 x i64>, ptr addrspace(1) null
  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  call void @external_void_func_v4i64(<4 x i64> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_f16_imm
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4400
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f16
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16)
  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_f16(half 4.0)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_f32_imm
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   $vgpr0 = COPY [[C]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_f32(float 4.0)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v2f32_imm
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C2]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v3f32_imm
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_v3f32(<3 x float> <float 1.0, float 2.0, float 4.0>)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v5f32_imm
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5f32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C5]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C6]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C7]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_v5f32(<5 x float> <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_f64_imm
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f64
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_f64(double 4.0)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v2f64_imm
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f64
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C2]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v3f64_imm
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_FCONSTANT double 8.000000e+00
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64), [[C2]](s64)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f64
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s64>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v2i16
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i16
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   $vgpr0 = COPY [[LOAD]](<2 x s16>)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %val = load <2 x i16>, ptr addrspace(1) undef
  call void @external_void_func_v2i16(<2 x i16> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v3i16
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `ptr addrspace(1) undef`, align 8, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i16
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>)
  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF3]](s16)
  ; CHECK-NEXT:   [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV3]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV4]](<2 x s16>)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %val = load <3 x i16>, ptr addrspace(1) undef
  call void @external_void_func_v3i16(<3 x i16> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v3f16
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `ptr addrspace(1) undef`, align 8, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f16
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>)
  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF3]](s16)
  ; CHECK-NEXT:   [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV3]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV4]](<2 x s16>)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %val = load <3 x half>, ptr addrspace(1) undef
  call void @external_void_func_v3f16(<3 x half> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v4i16
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s16>) from `ptr addrspace(1) undef`, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i16
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](<2 x s16>)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %val = load <4 x i16>, ptr addrspace(1) undef
  call void @external_void_func_v4i16(<4 x i16> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v4i16_imm
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 3
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i16
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C4]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C5]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C6]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](<2 x s16>)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v5i16
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<5 x s16>) from `ptr addrspace(1) undef`, align 16, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5i16
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<5 x s16>)
  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF3]](s16)
  ; CHECK-NEXT:   [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x s16>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV5]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV6]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV7]](<2 x s16>)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v5i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %val = load <5 x i16>, ptr addrspace(1) undef
  call void @external_void_func_v5i16(<5 x i16> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v7i16
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<7 x s16>) from `ptr addrspace(1) undef`, align 16, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v7i16
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<7 x s16>)
  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF3]](s16)
  ; CHECK-NEXT:   [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s16>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV7]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV8]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV9]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV10]](<2 x s16>)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v7i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %val = load <7 x i16>, ptr addrspace(1) undef
  call void @external_void_func_v7i16(<7 x i16> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v63i16
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<63 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<63 x s16>) from `ptr addrspace(1) undef`, align 128, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v63i16
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<63 x s16>)
  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<64 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[DEF3]](s16)
  ; CHECK-NEXT:   [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<64 x s16>)
  ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32)
  ; CHECK-NEXT:   G_STORE [[UV94]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV63]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV64]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV65]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV66]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr4 = COPY [[UV67]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr5 = COPY [[UV68]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr6 = COPY [[UV69]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr7 = COPY [[UV70]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr8 = COPY [[UV71]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr9 = COPY [[UV72]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr10 = COPY [[UV73]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr11 = COPY [[UV74]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr12 = COPY [[UV75]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr13 = COPY [[UV76]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr14 = COPY [[UV77]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr15 = COPY [[UV78]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr16 = COPY [[UV79]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr17 = COPY [[UV80]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr18 = COPY [[UV81]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr19 = COPY [[UV82]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr20 = COPY [[UV83]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr21 = COPY [[UV84]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr22 = COPY [[UV85]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr23 = COPY [[UV86]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr24 = COPY [[UV87]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr25 = COPY [[UV88]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr26 = COPY [[UV89]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr27 = COPY [[UV90]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr28 = COPY [[UV91]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr29 = COPY [[UV92]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr30 = COPY [[UV93]](<2 x s16>)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v63i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %val = load <63 x i16>, ptr addrspace(1) undef
  call void @external_void_func_v63i16(<63 x i16> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v65i16
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<65 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<65 x s16>) from `ptr addrspace(1) undef`, align 256, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v65i16
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<65 x s16>)
  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<66 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16), [[DEF3]](s16)
  ; CHECK-NEXT:   [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>), [[UV95:%[0-9]+]]:_(<2 x s16>), [[UV96:%[0-9]+]]:_(<2 x s16>), [[UV97:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<66 x s16>)
  ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32)
  ; CHECK-NEXT:   G_STORE [[UV96]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32)
  ; CHECK-NEXT:   G_STORE [[UV97]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV65]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV66]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV67]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV68]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr4 = COPY [[UV69]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr5 = COPY [[UV70]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr6 = COPY [[UV71]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr7 = COPY [[UV72]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr8 = COPY [[UV73]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr9 = COPY [[UV74]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr10 = COPY [[UV75]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr11 = COPY [[UV76]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr12 = COPY [[UV77]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr13 = COPY [[UV78]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr14 = COPY [[UV79]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr15 = COPY [[UV80]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr16 = COPY [[UV81]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr17 = COPY [[UV82]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr18 = COPY [[UV83]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr19 = COPY [[UV84]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr20 = COPY [[UV85]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr21 = COPY [[UV86]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr22 = COPY [[UV87]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr23 = COPY [[UV88]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr24 = COPY [[UV89]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr25 = COPY [[UV90]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr26 = COPY [[UV91]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr27 = COPY [[UV92]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr28 = COPY [[UV93]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr29 = COPY [[UV94]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr30 = COPY [[UV95]](<2 x s16>)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v65i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %val = load <65 x i16>, ptr addrspace(1) undef
  call void @external_void_func_v65i16(<65 x i16> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v66i16
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<66 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<66 x s16>) from `ptr addrspace(1) undef`, align 256, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v66i16
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<66 x s16>)
  ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32)
  ; CHECK-NEXT:   G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32)
  ; CHECK-NEXT:   G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](<2 x s16>)
  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](<2 x s16>)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v66i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %val = load <66 x i16>, ptr addrspace(1) undef
  call void @external_void_func_v66i16(<66 x i16> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v2f16
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f16
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   $vgpr0 = COPY [[LOAD]](<2 x s16>)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %val = load <2 x half>, ptr addrspace(1) undef
  call void @external_void_func_v2f16(<2 x half> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v2i32
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s32>) from `ptr addrspace(1) undef`, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %val = load <2 x i32>, ptr addrspace(1) undef
  call void @external_void_func_v2i32(<2 x i32> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v2i32_imm
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C2]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C3]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C4]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v3i32_imm
  ; CHECK: bb.1 (%ir-block.1):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32)
  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_v3i32(<3 x i32> <i32 3, i32 4, i32 5>)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v3i32_i32
  ; CHECK: bb.1 (%ir-block.1):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i32_i32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C4]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C5]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C6]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[C3]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v4i32
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s32>) from `ptr addrspace(1) undef`, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %val = load <4 x i32>, ptr addrspace(1) undef
  call void @external_void_func_v4i32(<4 x i32> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v4i32_imm
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C4]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C5]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C6]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v5i32_imm
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5i32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C5]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C6]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C7]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_v5i32(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v8i32
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s32>) from %ir.ptr, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %ptr = load ptr addrspace(1), ptr addrspace(4) undef
  %val = load <8 x i32>, ptr addrspace(1) %ptr
  call void @external_void_func_v8i32(<8 x i32> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v8i32_imm
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32), [[C5]](s32), [[C6]](s32), [[C7]](s32)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C8]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C9]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C10]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  call void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v16i32
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s32>) from %ir.ptr, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](s32)
  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](s32)
  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](s32)
  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](s32)
  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](s32)
  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](s32)
  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v16i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %ptr = load ptr addrspace(1), ptr addrspace(4) undef
  %val = load <16 x i32>, ptr addrspace(1) %ptr
  call void @external_void_func_v16i32(<16 x i32> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v32i32
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
  ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32)
  ; CHECK-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](s32)
  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](s32)
  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](s32)
  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](s32)
  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](s32)
  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](s32)
  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](s32)
  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](s32)
  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](s32)
  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](s32)
  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](s32)
  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](s32)
  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](s32)
  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](s32)
  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](s32)
  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](s32)
  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](s32)
  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](s32)
  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %ptr = load ptr addrspace(1), ptr addrspace(4) undef
  %val = load <32 x i32>, ptr addrspace(1) %ptr
  call void @external_void_func_v32i32(<32 x i32> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v32i32_i32
  ; CHECK: bb.1 (%ir-block.1):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1)
  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s32) from `ptr addrspace(1) undef`, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
  ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32)
  ; CHECK-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32)
  ; CHECK-NEXT:   G_STORE [[LOAD2]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 4, addrspace 5)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](s32)
  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](s32)
  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](s32)
  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](s32)
  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](s32)
  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](s32)
  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](s32)
  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](s32)
  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](s32)
  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](s32)
  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](s32)
  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](s32)
  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](s32)
  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](s32)
  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](s32)
  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](s32)
  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](s32)
  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](s32)
  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF2]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF3]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
  %val0 = load <32 x i32>, ptr addrspace(1) %ptr0
  %val1 = load i32, ptr addrspace(1) undef
  call void @external_void_func_v32i32_i32(<32 x i32> %val0, i32 %val1)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v32i32_i8_i8_i16
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1)
  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1)
  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i8_i8_i16
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
  ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32)
  ; CHECK-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD2]](s8)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32)
  ; CHECK-NEXT:   G_STORE [[ANYEXT]](s16), [[PTR_ADD2]](p5) :: (store (s16) into stack + 4, align 4, addrspace 5)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16)
  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](s32)
  ; CHECK-NEXT:   G_STORE [[COPY18]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5)
  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
  ; CHECK-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32)
  ; CHECK-NEXT:   G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](s32)
  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](s32)
  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](s32)
  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](s32)
  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](s32)
  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](s32)
  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](s32)
  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](s32)
  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](s32)
  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](s32)
  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](s32)
  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](s32)
  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](s32)
  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](s32)
  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](s32)
  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](s32)
  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](s32)
  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](s32)
  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF2]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF3]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i8_i8_i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 16, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
  %val0 = load <32 x i32>, ptr addrspace(1) %ptr0
  %val1 = load i8, ptr addrspace(1) undef
  %val2 = load i8, ptr addrspace(1) undef
  %val3 = load i16, ptr addrspace(1) undef
  call void @external_void_func_v32i32_i8_i8_i16(<32 x i32> %val0, i8 %val1, i8 %val2, i16 %val3)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v32i32_p3_p5
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1)
  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(p3) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (p3) from `ptr addrspace(1) undef`, addrspace 1)
  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(p5) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (p5) from `ptr addrspace(1) undef`, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_p3_p5
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
  ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32)
  ; CHECK-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32)
  ; CHECK-NEXT:   G_STORE [[LOAD2]](p3), [[PTR_ADD2]](p5) :: (store (p3) into stack + 4, addrspace 5)
  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](s32)
  ; CHECK-NEXT:   G_STORE [[LOAD3]](p5), [[PTR_ADD3]](p5) :: (store (p5) into stack + 8, align 8, addrspace 5)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](s32)
  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](s32)
  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](s32)
  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](s32)
  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](s32)
  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](s32)
  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](s32)
  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](s32)
  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](s32)
  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](s32)
  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](s32)
  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](s32)
  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](s32)
  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](s32)
  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](s32)
  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](s32)
  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](s32)
  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](s32)
  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF2]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF3]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32_p3_p5, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 12, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
  %val0 = load <32 x i32>, ptr addrspace(1) %ptr0
  %val1 = load ptr addrspace(3), ptr addrspace(1) undef
  %val2 = load ptr addrspace(5), ptr addrspace(1) undef
  call void @external_void_func_v32i32_p3_p5(<32 x i32> %val0, ptr addrspace(3) %val1, ptr addrspace(5) %val2)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_struct_i8_i32
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (s8) from %ir.ptr0, align 4, addrspace 1)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: ("amdgpu-noclobber" load (s32) from %ir.ptr0 + 4, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_struct_i8_i32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8)
  ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT1]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[LOAD2]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
  %val = load { i8, i32 }, ptr addrspace(1) %ptr0
  call void @external_void_func_struct_i8_i32({ i8, i32 } %val)
  ret void
}

define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
  ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32
  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8)
  ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT1]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[LOAD2]](s32)
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
  %val = load { i8, i32 }, ptr addrspace(1) %ptr0
  call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val)
  ret void
}

define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #0 {
  ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32_inreg
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg
  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8)
  ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
  ; CHECK-NEXT:   $sgpr4 = COPY [[ANYEXT1]](s32)
  ; CHECK-NEXT:   $sgpr5 = COPY [[LOAD2]](s32)
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
  %val = load { i8, i32 }, ptr addrspace(1) %ptr0
  call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_byval_struct_i8_i32
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.val
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; CHECK-NEXT:   %14:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32)
  ; CHECK-NEXT:   G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.val, addrspace 5)
  ; CHECK-NEXT:   G_STORE [[C1]](s32), %14(p5) :: (store (s32) into %ir.gep1, addrspace 5)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_byval_struct_i8_i32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C4]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C5]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32)
  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
  ; CHECK-NEXT:   G_MEMCPY [[PTR_ADD1]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.val, align 4, addrspace 5)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_byval_struct_i8_i32, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %val = alloca { i8, i32 }, align 4, addrspace(5)
  %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 0
  %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 1
  store i8 3, ptr addrspace(5) %gep0
  store i32 8, ptr addrspace(5) %gep1
  call void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %val)
  ret void
}

declare void @void_func_byval_a3i32_byval_i8_align32(ptr addrspace(5) byval([3 x i32]) %arg0, ptr addrspace(5) byval(i8) align 32 %arg1, i32 %arg2) #0

define void @call_byval_3ai32_byval_i8_align32(ptr addrspace(5) %incoming0, ptr addrspace(5) align 32 %incoming1) #0 {
  ; CHECK-LABEL: name: call_byval_3ai32_byval_i8_align32
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p5) = COPY $vgpr1
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 999
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a3i32_byval_i8_align32
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C1]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
  ; CHECK-NEXT:   G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C2]](s32), 0 :: (dereferenceable store (s96) into stack, align 4, addrspace 5), (dereferenceable load (s96) from %ir.incoming0, align 4, addrspace 5)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
  ; CHECK-NEXT:   G_MEMCPY [[PTR_ADD1]](p5), [[COPY10]](p5), [[C4]](s32), 0 :: (dereferenceable store (s8) into stack + 32, align 32, addrspace 5), (dereferenceable load (s8) from %ir.incoming1, align 32, addrspace 5)
  ; CHECK-NEXT:   $vgpr0 = COPY [[C]](s32)
  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY13]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY18]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY19]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @void_func_byval_a3i32_byval_i8_align32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 36, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call void @void_func_byval_a3i32_byval_i8_align32(ptr addrspace(5) byval([3 x i32]) %incoming0, ptr addrspace(5) align 32 %incoming1, i32 999)
  ret void
}

declare void @void_func_byval_a4i64_align4(ptr addrspace(5) byval([4 x i64]) align 4 %arg0) #0

; Make sure we are aware of the higher alignment of the incoming value
; than implied by the outgoing byval alignment in the memory operand.
define void @call_byval_a4i64_align4_higher_source_align(ptr addrspace(5) align 256 %incoming_high_align) #0 {
  ; CHECK-LABEL: name: call_byval_a4i64_align4_higher_source_align
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr0
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a4i64_align4
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
  ; CHECK-NEXT:   G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C1]](s32), 0 :: (dereferenceable store (s256) into stack, align 4, addrspace 5), (dereferenceable load (s256) from %ir.incoming_high_align, align 256, addrspace 5)
  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY12]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY18]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 32, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call void @void_func_byval_a4i64_align4(ptr addrspace(5) byval([4 x i64]) align 4 %incoming_high_align)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v2i8
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<2 x s8>) from %ir.ptr, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i8
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<2 x s8>)
  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
  ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
  ; CHECK-NEXT:   [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT2]](s32)
  ; CHECK-NEXT:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16)
  ; CHECK-NEXT:   $vgpr1 = COPY [[ANYEXT3]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %ptr = load ptr addrspace(1), ptr addrspace(4) undef
  %val = load <2 x i8>, ptr addrspace(1) %ptr
  call void @external_void_func_v2i8(<2 x i8> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v3i8
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<3 x s8>) from %ir.ptr, align 4, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i8
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<3 x s8>)
  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
  ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
  ; CHECK-NEXT:   [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8)
  ; CHECK-NEXT:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT3]](s32)
  ; CHECK-NEXT:   [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16)
  ; CHECK-NEXT:   $vgpr1 = COPY [[ANYEXT4]](s32)
  ; CHECK-NEXT:   [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16)
  ; CHECK-NEXT:   $vgpr2 = COPY [[ANYEXT5]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %ptr = load ptr addrspace(1), ptr addrspace(4) undef
  %val = load <3 x i8>, ptr addrspace(1) %ptr
  call void @external_void_func_v3i8(<3 x i8> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v4i8
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<4 x s8>) from %ir.ptr, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i8
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>)
  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
  ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
  ; CHECK-NEXT:   [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8)
  ; CHECK-NEXT:   [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
  ; CHECK-NEXT:   [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT4]](s32)
  ; CHECK-NEXT:   [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16)
  ; CHECK-NEXT:   $vgpr1 = COPY [[ANYEXT5]](s32)
  ; CHECK-NEXT:   [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16)
  ; CHECK-NEXT:   $vgpr2 = COPY [[ANYEXT6]](s32)
  ; CHECK-NEXT:   [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16)
  ; CHECK-NEXT:   $vgpr3 = COPY [[ANYEXT7]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %ptr = load ptr addrspace(1), ptr addrspace(4) undef
  %val = load <4 x i8>, ptr addrspace(1) %ptr
  call void @external_void_func_v4i8(<4 x i8> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v8i8
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s8>) from %ir.ptr, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i8
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<8 x s8>)
  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
  ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
  ; CHECK-NEXT:   [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8)
  ; CHECK-NEXT:   [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
  ; CHECK-NEXT:   [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8)
  ; CHECK-NEXT:   [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8)
  ; CHECK-NEXT:   [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8)
  ; CHECK-NEXT:   [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8)
  ; CHECK-NEXT:   [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT8]](s32)
  ; CHECK-NEXT:   [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16)
  ; CHECK-NEXT:   $vgpr1 = COPY [[ANYEXT9]](s32)
  ; CHECK-NEXT:   [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16)
  ; CHECK-NEXT:   $vgpr2 = COPY [[ANYEXT10]](s32)
  ; CHECK-NEXT:   [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16)
  ; CHECK-NEXT:   $vgpr3 = COPY [[ANYEXT11]](s32)
  ; CHECK-NEXT:   [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16)
  ; CHECK-NEXT:   $vgpr4 = COPY [[ANYEXT12]](s32)
  ; CHECK-NEXT:   [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16)
  ; CHECK-NEXT:   $vgpr5 = COPY [[ANYEXT13]](s32)
  ; CHECK-NEXT:   [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16)
  ; CHECK-NEXT:   $vgpr6 = COPY [[ANYEXT14]](s32)
  ; CHECK-NEXT:   [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16)
  ; CHECK-NEXT:   $vgpr7 = COPY [[ANYEXT15]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v8i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %ptr = load ptr addrspace(1), ptr addrspace(4) undef
  %val = load <8 x i8>, ptr addrspace(1) %ptr
  call void @external_void_func_v8i8(<8 x i8> %val)
  ret void
}

define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v16i8
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s8>) from %ir.ptr, addrspace 1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i8
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C1]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>)
  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
  ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
  ; CHECK-NEXT:   [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8)
  ; CHECK-NEXT:   [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
  ; CHECK-NEXT:   [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8)
  ; CHECK-NEXT:   [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8)
  ; CHECK-NEXT:   [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8)
  ; CHECK-NEXT:   [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8)
  ; CHECK-NEXT:   [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8)
  ; CHECK-NEXT:   [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8)
  ; CHECK-NEXT:   [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8)
  ; CHECK-NEXT:   [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8)
  ; CHECK-NEXT:   [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8)
  ; CHECK-NEXT:   [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8)
  ; CHECK-NEXT:   [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8)
  ; CHECK-NEXT:   [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8)
  ; CHECK-NEXT:   [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
  ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT16]](s32)
  ; CHECK-NEXT:   [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16)
  ; CHECK-NEXT:   $vgpr1 = COPY [[ANYEXT17]](s32)
  ; CHECK-NEXT:   [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16)
  ; CHECK-NEXT:   $vgpr2 = COPY [[ANYEXT18]](s32)
  ; CHECK-NEXT:   [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16)
  ; CHECK-NEXT:   $vgpr3 = COPY [[ANYEXT19]](s32)
  ; CHECK-NEXT:   [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16)
  ; CHECK-NEXT:   $vgpr4 = COPY [[ANYEXT20]](s32)
  ; CHECK-NEXT:   [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16)
  ; CHECK-NEXT:   $vgpr5 = COPY [[ANYEXT21]](s32)
  ; CHECK-NEXT:   [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16)
  ; CHECK-NEXT:   $vgpr6 = COPY [[ANYEXT22]](s32)
  ; CHECK-NEXT:   [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16)
  ; CHECK-NEXT:   $vgpr7 = COPY [[ANYEXT23]](s32)
  ; CHECK-NEXT:   [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT8]](s16)
  ; CHECK-NEXT:   $vgpr8 = COPY [[ANYEXT24]](s32)
  ; CHECK-NEXT:   [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT9]](s16)
  ; CHECK-NEXT:   $vgpr9 = COPY [[ANYEXT25]](s32)
  ; CHECK-NEXT:   [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT10]](s16)
  ; CHECK-NEXT:   $vgpr10 = COPY [[ANYEXT26]](s32)
  ; CHECK-NEXT:   [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT11]](s16)
  ; CHECK-NEXT:   $vgpr11 = COPY [[ANYEXT27]](s32)
  ; CHECK-NEXT:   [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT12]](s16)
  ; CHECK-NEXT:   $vgpr12 = COPY [[ANYEXT28]](s32)
  ; CHECK-NEXT:   [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT13]](s16)
  ; CHECK-NEXT:   $vgpr13 = COPY [[ANYEXT29]](s32)
  ; CHECK-NEXT:   [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT14]](s16)
  ; CHECK-NEXT:   $vgpr14 = COPY [[ANYEXT30]](s32)
  ; CHECK-NEXT:   [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16)
  ; CHECK-NEXT:   $vgpr15 = COPY [[ANYEXT31]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF1]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v16i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %ptr = load ptr addrspace(1), ptr addrspace(4) undef
  %val = load <16 x i8>, ptr addrspace(1) %ptr
  call void @external_void_func_v16i8(<16 x i8> %val)
  ret void
}

define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
  ; CHECK-LABEL: name: stack_passed_arg_alignment_v32i32_f64
  ; CHECK: bb.1.entry:
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.val.kernarg.offset1, align 16, addrspace 4)
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
  ; CHECK-NEXT:   %17:_(p4) = nuw nusw G_PTR_ADD [[INT]], [[C]](s64)
  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD %17(p4) :: (dereferenceable invariant load (s64) from %ir.tmp.kernarg.offset, align 16, addrspace 4)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @stack_passed_f64_arg
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<32 x s32>)
  ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32)
  ; CHECK-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
  ; CHECK-NEXT:   [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](s64)
  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](s32)
  ; CHECK-NEXT:   G_STORE [[UV32]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 4, addrspace 5)
  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32)
  ; CHECK-NEXT:   G_STORE [[UV33]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](s32)
  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](s32)
  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](s32)
  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](s32)
  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](s32)
  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](s32)
  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](s32)
  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](s32)
  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](s32)
  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](s32)
  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](s32)
  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](s32)
  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](s32)
  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](s32)
  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](s32)
  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](s32)
  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](s32)
  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](s32)
  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY12]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @stack_passed_f64_arg, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 12, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
entry:
  call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
  ret void
}

define void @stack_12xv3i32() #0 {
  ; CHECK-LABEL: name: stack_12xv3i32
  ; CHECK: bb.1.entry:
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
  ; CHECK-NEXT:   [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
  ; CHECK-NEXT:   [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; CHECK-NEXT:   [[BUILD_VECTOR4:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32)
  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
  ; CHECK-NEXT:   [[BUILD_VECTOR5:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32), [[C5]](s32)
  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
  ; CHECK-NEXT:   [[BUILD_VECTOR6:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C6]](s32), [[C6]](s32), [[C6]](s32)
  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
  ; CHECK-NEXT:   [[BUILD_VECTOR7:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C7]](s32), [[C7]](s32), [[C7]](s32)
  ; CHECK-NEXT:   [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
  ; CHECK-NEXT:   [[BUILD_VECTOR8:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C8]](s32), [[C8]](s32), [[C8]](s32)
  ; CHECK-NEXT:   [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
  ; CHECK-NEXT:   [[BUILD_VECTOR9:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C9]](s32), [[C9]](s32), [[C9]](s32)
  ; CHECK-NEXT:   [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
  ; CHECK-NEXT:   [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
  ; CHECK-NEXT:   [[BUILD_VECTOR10:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C10]](s32), [[C11]](s32), [[C12]](s32)
  ; CHECK-NEXT:   [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
  ; CHECK-NEXT:   [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
  ; CHECK-NEXT:   [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
  ; CHECK-NEXT:   [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3i32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
  ; CHECK-NEXT:   [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>)
  ; CHECK-NEXT:   [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>)
  ; CHECK-NEXT:   [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<3 x s32>)
  ; CHECK-NEXT:   [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<3 x s32>)
  ; CHECK-NEXT:   [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<3 x s32>)
  ; CHECK-NEXT:   [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<3 x s32>)
  ; CHECK-NEXT:   [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<3 x s32>)
  ; CHECK-NEXT:   [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>)
  ; CHECK-NEXT:   [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>)
  ; CHECK-NEXT:   [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>)
  ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
  ; CHECK-NEXT:   [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C16]](s32)
  ; CHECK-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
  ; CHECK-NEXT:   [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C17]](s32)
  ; CHECK-NEXT:   G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
  ; CHECK-NEXT:   [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>)
  ; CHECK-NEXT:   [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C18]](s32)
  ; CHECK-NEXT:   G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
  ; CHECK-NEXT:   [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C19]](s32)
  ; CHECK-NEXT:   G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5)
  ; CHECK-NEXT:   [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
  ; CHECK-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C20]](s32)
  ; CHECK-NEXT:   G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](s32)
  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](s32)
  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](s32)
  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](s32)
  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](s32)
  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](s32)
  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](s32)
  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](s32)
  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](s32)
  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](s32)
  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](s32)
  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](s32)
  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](s32)
  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](s32)
  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](s32)
  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](s32)
  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](s32)
  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](s32)
  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_12xv3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 20, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
entry:
  call void @external_void_func_12xv3i32(
      <3 x i32> <i32 0, i32 0, i32 0>,
      <3 x i32> <i32 1, i32 1, i32 1>,
      <3 x i32> <i32 2, i32 2, i32 2>,
      <3 x i32> <i32 3, i32 3, i32 3>,
      <3 x i32> <i32 4, i32 4, i32 4>,
      <3 x i32> <i32 5, i32 5, i32 5>,
      <3 x i32> <i32 6, i32 6, i32 6>,
      <3 x i32> <i32 7, i32 7, i32 7>,
      <3 x i32> <i32 8, i32 8, i32 8>,
      <3 x i32> <i32 9, i32 9, i32 9>,
      <3 x i32> <i32 10, i32 11, i32 12>,
      <3 x i32> <i32 13, i32 14, i32 15>)
  ret void
}

define void @stack_12xv3f32() #0 {
  ; CHECK-LABEL: name: stack_12xv3f32
  ; CHECK: bb.1.entry:
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
  ; CHECK-NEXT:   [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.000000e+00
  ; CHECK-NEXT:   [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
  ; CHECK-NEXT:   [[BUILD_VECTOR4:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32)
  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e+00
  ; CHECK-NEXT:   [[BUILD_VECTOR5:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32), [[C5]](s32)
  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.000000e+00
  ; CHECK-NEXT:   [[BUILD_VECTOR6:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C6]](s32), [[C6]](s32), [[C6]](s32)
  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 7.000000e+00
  ; CHECK-NEXT:   [[BUILD_VECTOR7:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C7]](s32), [[C7]](s32), [[C7]](s32)
  ; CHECK-NEXT:   [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 8.000000e+00
  ; CHECK-NEXT:   [[BUILD_VECTOR8:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C8]](s32), [[C8]](s32), [[C8]](s32)
  ; CHECK-NEXT:   [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 9.000000e+00
  ; CHECK-NEXT:   [[BUILD_VECTOR9:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C9]](s32), [[C9]](s32), [[C9]](s32)
  ; CHECK-NEXT:   [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+01
  ; CHECK-NEXT:   [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.100000e+01
  ; CHECK-NEXT:   [[C12:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.200000e+01
  ; CHECK-NEXT:   [[BUILD_VECTOR10:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C10]](s32), [[C11]](s32), [[C12]](s32)
  ; CHECK-NEXT:   [[C13:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.300000e+01
  ; CHECK-NEXT:   [[C14:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.400000e+01
  ; CHECK-NEXT:   [[C15:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.500000e+01
  ; CHECK-NEXT:   [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3f32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
  ; CHECK-NEXT:   [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>)
  ; CHECK-NEXT:   [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>)
  ; CHECK-NEXT:   [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<3 x s32>)
  ; CHECK-NEXT:   [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<3 x s32>)
  ; CHECK-NEXT:   [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<3 x s32>)
  ; CHECK-NEXT:   [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<3 x s32>)
  ; CHECK-NEXT:   [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<3 x s32>)
  ; CHECK-NEXT:   [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>)
  ; CHECK-NEXT:   [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>)
  ; CHECK-NEXT:   [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>)
  ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
  ; CHECK-NEXT:   [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C16]](s32)
  ; CHECK-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
  ; CHECK-NEXT:   [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C17]](s32)
  ; CHECK-NEXT:   G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
  ; CHECK-NEXT:   [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>)
  ; CHECK-NEXT:   [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C18]](s32)
  ; CHECK-NEXT:   G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
  ; CHECK-NEXT:   [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C19]](s32)
  ; CHECK-NEXT:   G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5)
  ; CHECK-NEXT:   [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
  ; CHECK-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C20]](s32)
  ; CHECK-NEXT:   G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](s32)
  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](s32)
  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](s32)
  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](s32)
  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](s32)
  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](s32)
  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](s32)
  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](s32)
  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](s32)
  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](s32)
  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](s32)
  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](s32)
  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](s32)
  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](s32)
  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](s32)
  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](s32)
  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](s32)
  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](s32)
  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_12xv3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 20, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
entry:
  call void @external_void_func_12xv3f32(
      <3 x float> <float 0.0, float 0.0, float 0.0>,
      <3 x float> <float 1.0, float 1.0, float 1.0>,
      <3 x float> <float 2.0, float 2.0, float 2.0>,
      <3 x float> <float 3.0, float 3.0, float 3.0>,
      <3 x float> <float 4.0, float 4.0, float 4.0>,
      <3 x float> <float 5.0, float 5.0, float 5.0>,
      <3 x float> <float 6.0, float 6.0, float 6.0>,
      <3 x float> <float 7.0, float 7.0, float 7.0>,
      <3 x float> <float 8.0, float 8.0, float 8.0>,
      <3 x float> <float 9.0, float 9.0, float 9.0>,
      <3 x float> <float 10.0, float 11.0, float 12.0>,
      <3 x float> <float 13.0, float 14.0, float 15.0>)
  ret void
}

define void @stack_8xv5i32() #0 {
  ; CHECK-LABEL: name: stack_8xv5i32
  ; CHECK: bb.1.entry:
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
  ; CHECK-NEXT:   [[BUILD_VECTOR2:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
  ; CHECK-NEXT:   [[BUILD_VECTOR3:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; CHECK-NEXT:   [[BUILD_VECTOR4:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32)
  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
  ; CHECK-NEXT:   [[BUILD_VECTOR5:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32), [[C5]](s32), [[C5]](s32), [[C5]](s32)
  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
  ; CHECK-NEXT:   [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
  ; CHECK-NEXT:   [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
  ; CHECK-NEXT:   [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
  ; CHECK-NEXT:   [[BUILD_VECTOR6:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C6]](s32), [[C7]](s32), [[C8]](s32), [[C9]](s32), [[C10]](s32)
  ; CHECK-NEXT:   [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
  ; CHECK-NEXT:   [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
  ; CHECK-NEXT:   [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
  ; CHECK-NEXT:   [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
  ; CHECK-NEXT:   [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
  ; CHECK-NEXT:   [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5i32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>)
  ; CHECK-NEXT:   [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>)
  ; CHECK-NEXT:   [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>)
  ; CHECK-NEXT:   [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<5 x s32>)
  ; CHECK-NEXT:   [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>)
  ; CHECK-NEXT:   [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>)
  ; CHECK-NEXT:   [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>)
  ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
  ; CHECK-NEXT:   [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C16]](s32)
  ; CHECK-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
  ; CHECK-NEXT:   [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C17]](s32)
  ; CHECK-NEXT:   G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
  ; CHECK-NEXT:   [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C18]](s32)
  ; CHECK-NEXT:   G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
  ; CHECK-NEXT:   [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C19]](s32)
  ; CHECK-NEXT:   G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5)
  ; CHECK-NEXT:   [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>)
  ; CHECK-NEXT:   [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
  ; CHECK-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C20]](s32)
  ; CHECK-NEXT:   G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5)
  ; CHECK-NEXT:   [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C21]](s32)
  ; CHECK-NEXT:   G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store (s32) into stack + 20, addrspace 5)
  ; CHECK-NEXT:   [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
  ; CHECK-NEXT:   [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C22]](s32)
  ; CHECK-NEXT:   G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store (s32) into stack + 24, align 8, addrspace 5)
  ; CHECK-NEXT:   [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
  ; CHECK-NEXT:   [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C23]](s32)
  ; CHECK-NEXT:   G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store (s32) into stack + 28, addrspace 5)
  ; CHECK-NEXT:   [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
  ; CHECK-NEXT:   [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C24]](s32)
  ; CHECK-NEXT:   G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](s32)
  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](s32)
  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](s32)
  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](s32)
  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](s32)
  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](s32)
  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](s32)
  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](s32)
  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](s32)
  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](s32)
  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](s32)
  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](s32)
  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](s32)
  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](s32)
  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](s32)
  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](s32)
  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](s32)
  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](s32)
  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_8xv5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 36, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
entry:
  call void @external_void_func_8xv5i32(
      <5 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0>,
      <5 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1>,
      <5 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2>,
      <5 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3>,
      <5 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4>,
      <5 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5>,
      <5 x i32> <i32 6, i32 7, i32 8, i32 9, i32 10>,
      <5 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15>)
  ret void
}

define void @stack_8xv5f32() #0 {
  ; CHECK-LABEL: name: stack_8xv5f32
  ; CHECK: bb.1.entry:
  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
  ; CHECK-NEXT:   [[BUILD_VECTOR2:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32)
  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 3.000000e+00
  ; CHECK-NEXT:   [[BUILD_VECTOR3:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
  ; CHECK-NEXT:   [[BUILD_VECTOR4:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32)
  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e+00
  ; CHECK-NEXT:   [[BUILD_VECTOR5:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32), [[C5]](s32), [[C5]](s32), [[C5]](s32)
  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 6.000000e+00
  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 7.000000e+00
  ; CHECK-NEXT:   [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 8.000000e+00
  ; CHECK-NEXT:   [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 9.000000e+00
  ; CHECK-NEXT:   [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+01
  ; CHECK-NEXT:   [[BUILD_VECTOR6:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C6]](s32), [[C7]](s32), [[C8]](s32), [[C9]](s32), [[C10]](s32)
  ; CHECK-NEXT:   [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.100000e+01
  ; CHECK-NEXT:   [[C12:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.200000e+01
  ; CHECK-NEXT:   [[C13:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.300000e+01
  ; CHECK-NEXT:   [[C14:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.400000e+01
  ; CHECK-NEXT:   [[C15:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.500000e+01
  ; CHECK-NEXT:   [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5f32
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>)
  ; CHECK-NEXT:   [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>)
  ; CHECK-NEXT:   [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>)
  ; CHECK-NEXT:   [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<5 x s32>)
  ; CHECK-NEXT:   [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>)
  ; CHECK-NEXT:   [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>)
  ; CHECK-NEXT:   [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>)
  ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
  ; CHECK-NEXT:   [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C16]](s32)
  ; CHECK-NEXT:   G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
  ; CHECK-NEXT:   [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C17]](s32)
  ; CHECK-NEXT:   G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
  ; CHECK-NEXT:   [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C18]](s32)
  ; CHECK-NEXT:   G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
  ; CHECK-NEXT:   [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C19]](s32)
  ; CHECK-NEXT:   G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5)
  ; CHECK-NEXT:   [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>)
  ; CHECK-NEXT:   [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
  ; CHECK-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C20]](s32)
  ; CHECK-NEXT:   G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5)
  ; CHECK-NEXT:   [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; CHECK-NEXT:   [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C21]](s32)
  ; CHECK-NEXT:   G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store (s32) into stack + 20, addrspace 5)
  ; CHECK-NEXT:   [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
  ; CHECK-NEXT:   [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C22]](s32)
  ; CHECK-NEXT:   G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store (s32) into stack + 24, align 8, addrspace 5)
  ; CHECK-NEXT:   [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
  ; CHECK-NEXT:   [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C23]](s32)
  ; CHECK-NEXT:   G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store (s32) into stack + 28, addrspace 5)
  ; CHECK-NEXT:   [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
  ; CHECK-NEXT:   [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C24]](s32)
  ; CHECK-NEXT:   G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5)
  ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
  ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
  ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
  ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
  ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](s32)
  ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](s32)
  ; CHECK-NEXT:   $vgpr10 = COPY [[UV10]](s32)
  ; CHECK-NEXT:   $vgpr11 = COPY [[UV11]](s32)
  ; CHECK-NEXT:   $vgpr12 = COPY [[UV12]](s32)
  ; CHECK-NEXT:   $vgpr13 = COPY [[UV13]](s32)
  ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
  ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
  ; CHECK-NEXT:   $vgpr16 = COPY [[UV16]](s32)
  ; CHECK-NEXT:   $vgpr17 = COPY [[UV17]](s32)
  ; CHECK-NEXT:   $vgpr18 = COPY [[UV18]](s32)
  ; CHECK-NEXT:   $vgpr19 = COPY [[UV19]](s32)
  ; CHECK-NEXT:   $vgpr20 = COPY [[UV20]](s32)
  ; CHECK-NEXT:   $vgpr21 = COPY [[UV21]](s32)
  ; CHECK-NEXT:   $vgpr22 = COPY [[UV22]](s32)
  ; CHECK-NEXT:   $vgpr23 = COPY [[UV23]](s32)
  ; CHECK-NEXT:   $vgpr24 = COPY [[UV24]](s32)
  ; CHECK-NEXT:   $vgpr25 = COPY [[UV25]](s32)
  ; CHECK-NEXT:   $vgpr26 = COPY [[UV26]](s32)
  ; CHECK-NEXT:   $vgpr27 = COPY [[UV27]](s32)
  ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
  ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
  ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_8xv5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 36, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
entry:
  call void @external_void_func_8xv5f32(
      <5 x float> <float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>,
      <5 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>,
      <5 x float> <float 2.0, float 2.0, float 2.0, float 2.0, float 2.0>,
      <5 x float> <float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>,
      <5 x float> <float 4.0, float 4.0, float 4.0, float 4.0, float 4.0>,
      <5 x float> <float 5.0, float 5.0, float 5.0, float 5.0, float 5.0>,
      <5 x float> <float 6.0, float 7.0, float 8.0, float 9.0, float 10.0>,
      <5 x float> <float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>)
  ret void
}

define amdgpu_ps void @amdgpu_ps_call_default_cc() {
  ; CHECK-LABEL: name: amdgpu_ps_call_default_cc
  ; CHECK: bb.1.main_body:
  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY [[DEF]](p4)
  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p4) = COPY [[C1]](p4)
  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[DEF]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY1]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[DEF1]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[DEF2]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY2]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY3]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY4]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY5]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[C]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
main_body:
  call void null()
  ret void
}

define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_i16_inreg
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr6, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr6
  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_inreg
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
  ; CHECK-NEXT:   $sgpr0 = COPY [[ANYEXT]](s32)
  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY12]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY18]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call void @external_void_func_i16_inreg(i16 inreg %arg)
  ret void
}

define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_i32_inreg
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr6, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr6
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32_inreg
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   $sgpr0 = COPY [[COPY9]](s32)
  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY12]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY18]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call void @external_void_func_i32_inreg(i32 inreg %arg)
  ret void
}

define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_i64_inreg
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr6, $sgpr7, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr6
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr7
  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i64_inreg
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
  ; CHECK-NEXT:   $sgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $sgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY13]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY18]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY19]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i64_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call void @external_void_func_i64_inreg(i64 inreg %arg)
  ret void
}

define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v2i32_inreg
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr6, $sgpr7, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr6
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr7
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i32_inreg
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>)
  ; CHECK-NEXT:   $sgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $sgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY13]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY18]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY19]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i32_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call void @external_void_func_v2i32_inreg(<2 x i32> inreg %arg)
  ret void
}

define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_f16_inreg
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr6, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr6
  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f16_inreg
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
  ; CHECK-NEXT:   $sgpr0 = COPY [[ANYEXT]](s32)
  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY12]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY18]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call void @external_void_func_f16_inreg(half inreg %arg)
  ret void
}

define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_bf16_inreg
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr6, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr6
  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_bf16_inreg
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
  ; CHECK-NEXT:   $sgpr0 = COPY [[ANYEXT]](s32)
  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY12]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY18]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_bf16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call void @external_void_func_bf16_inreg(bfloat inreg %arg)
  ret void
}

define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_f32_inreg
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr6, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr6
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f32_inreg
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   $sgpr0 = COPY [[COPY9]](s32)
  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY12]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY18]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f32_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call void @external_void_func_f32_inreg(float inreg %arg)
  ret void
}

define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_f64_inreg
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr6, $sgpr7, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr6
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr7
  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f64_inreg
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
  ; CHECK-NEXT:   $sgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $sgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY13]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY18]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY19]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f64_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call void @external_void_func_f64_inreg(double inreg %arg)
  ret void
}

define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v2f16_inreg
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr6, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $sgpr6
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f16_inreg
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   $sgpr0 = COPY [[COPY9]](<2 x s16>)
  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY12]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY18]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call void @external_void_func_v2f16_inreg(<2 x half> inreg %arg)
  ret void
}

define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v3f16_inreg
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr6, $sgpr7, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $sgpr6
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $sgpr7
  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>)
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f16_inreg
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s16>)
  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF]](s16)
  ; CHECK-NEXT:   [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s16>)
  ; CHECK-NEXT:   $sgpr0 = COPY [[UV7]](<2 x s16>)
  ; CHECK-NEXT:   $sgpr1 = COPY [[UV8]](<2 x s16>)
  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY13]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY18]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY19]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call void @external_void_func_v3f16_inreg(<3 x half> inreg %arg)
  ret void
}

define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v4f16_inreg
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr6, $sgpr7, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $sgpr6
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $sgpr7
  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4f16_inreg
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
  ; CHECK-NEXT:   $sgpr0 = COPY [[UV]](<2 x s16>)
  ; CHECK-NEXT:   $sgpr1 = COPY [[UV1]](<2 x s16>)
  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY13]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY18]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY19]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4f16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call void @external_void_func_v4f16_inreg(<4 x half> inreg %arg)
  ret void
}

define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_p0_inreg
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr6, $sgpr7, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr6
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr7
  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_p0_inreg
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p0)
  ; CHECK-NEXT:   $sgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $sgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY13]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY18]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY19]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_p0_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call void @external_void_func_p0_inreg(ptr inreg %arg)
  ret void
}

define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_p1_inreg
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr6, $sgpr7, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr6
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr7
  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_p1_inreg
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1)
  ; CHECK-NEXT:   $sgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $sgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY13]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY18]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY19]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_p1_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call void @external_void_func_p1_inreg(ptr addrspace(1) inreg %arg)
  ret void
}

define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_p3_inreg
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr6, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p3) = COPY $sgpr6
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_p3_inreg
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   $sgpr0 = COPY [[COPY9]](p3)
  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY12]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY18]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_p3_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call void @external_void_func_p3_inreg(ptr addrspace(3) inreg %arg)
  ret void
}

define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg %arg) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v2p1_inreg
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr6, $sgpr7, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr6
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr7
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr16
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s32) = COPY $sgpr17
  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32)
  ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY11]](s32), [[COPY12]](s32)
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2p1_inreg
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x p1>)
  ; CHECK-NEXT:   $sgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $sgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   $sgpr2 = COPY [[UV2]](s32)
  ; CHECK-NEXT:   $sgpr3 = COPY [[UV3]](s32)
  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY13]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY14]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY15]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY16]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY18]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY19]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY20]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY21]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2p1_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call void @external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg %arg)
  ret void
}

define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inreg %arg) #0 {
  ; CHECK-LABEL: name: test_call_external_void_func_v2p5_inreg
  ; CHECK: bb.1 (%ir-block.0):
  ; CHECK-NEXT:   liveins: $sgpr6, $sgpr7, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p5) = COPY $sgpr6
  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p5) = COPY $sgpr7
  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p5>) = G_BUILD_VECTOR [[COPY9]](p5), [[COPY10]](p5)
  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2p5_inreg
  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x p5>)
  ; CHECK-NEXT:   $sgpr0 = COPY [[UV]](s32)
  ; CHECK-NEXT:   $sgpr1 = COPY [[UV1]](s32)
  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY13]](p4)
  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY18]](s32)
  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY19]](s32)
  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2p5_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   SI_RETURN
  call void @external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inreg %arg)
  ret void
}

attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind noinline }

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}