llvm/llvm/test/CodeGen/AMDGPU/max-hard-clause-length.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1030 | FileCheck %s -check-prefixes=GFX10
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 | FileCheck %s -check-prefix=GFX11
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefix=GFX12

;; Check that consecutive store operations are grouped greedily into
;; hard clauses of the appropriate length for each target.
;; This test uses <4 x i32> stores in order to prevent the stores from
;; being combined into larger operations due to their adjecency.
define amdgpu_kernel void @long_store_chain(ptr addrspace(1) %p) {
; GFX10-LABEL: long_store_chain:
; GFX10:       ; %bb.0:
; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x24
; GFX10-NEXT:    s_mov_b32 s0, 0
; GFX10-NEXT:    v_mov_b32_e32 v4, 0
; GFX10-NEXT:    s_mov_b32 s1, s0
; GFX10-NEXT:    s_mov_b32 s2, s0
; GFX10-NEXT:    s_mov_b32 s3, s0
; GFX10-NEXT:    v_mov_b32_e32 v0, s0
; GFX10-NEXT:    v_mov_b32_e32 v1, s1
; GFX10-NEXT:    v_mov_b32_e32 v2, s2
; GFX10-NEXT:    v_mov_b32_e32 v3, s3
; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5]
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:16
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:32
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:48
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:64
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:80
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:96
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:112
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:128
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:144
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:160
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:176
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:192
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:208
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:224
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:240
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:256
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:272
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:288
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:304
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:320
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:336
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:352
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:368
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:384
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:400
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:416
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:432
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:448
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:464
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:480
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:496
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:512
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:528
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:544
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:560
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:576
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:592
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:608
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:624
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:640
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:656
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:672
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:688
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:704
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:720
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:736
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:752
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:768
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:784
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:800
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:816
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:832
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:848
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:864
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:880
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:896
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:912
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:928
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:944
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:960
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:976
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:992
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:1008
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:1024
; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5] offset:1040
; GFX10-NEXT:    s_endpgm
;
; GFX11-LABEL: long_store_chain:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_load_b64 s[4:5], s[2:3], 0x24
; GFX11-NEXT:    s_mov_b32 s0, 0
; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX11-NEXT:    s_mov_b32 s1, s0
; GFX11-NEXT:    s_mov_b32 s2, s0
; GFX11-NEXT:    s_mov_b32 s3, s0
; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT:    v_mov_b32_e32 v2, s2
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_clause 0x1f
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5]
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:16
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:32
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:48
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:64
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:80
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:96
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:112
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:128
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:144
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:160
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:176
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:192
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:208
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:224
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:240
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:256
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:272
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:288
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:304
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:320
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:336
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:352
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:368
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:384
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:400
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:416
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:432
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:448
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:464
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:480
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:496
; GFX11-NEXT:    s_clause 0x1f
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:512
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:528
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:544
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:560
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:576
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:592
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:608
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:624
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:640
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:656
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:672
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:688
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:704
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:720
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:736
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:752
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:768
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:784
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:800
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:816
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:832
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:848
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:864
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:880
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:896
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:912
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:928
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:944
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:960
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:976
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:992
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:1008
; GFX11-NEXT:    s_clause 0x1
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:1024
; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:1040
; GFX11-NEXT:    s_nop 0
; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT:    s_endpgm
;
; GFX12-LABEL: long_store_chain:
; GFX12:       ; %bb.0:
; GFX12-NEXT:    s_load_b64 s[4:5], s[2:3], 0x24
; GFX12-NEXT:    s_mov_b32 s0, 0
; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX12-NEXT:    s_mov_b32 s1, s0
; GFX12-NEXT:    s_mov_b32 s2, s0
; GFX12-NEXT:    s_mov_b32 s3, s0
; GFX12-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s1
; GFX12-NEXT:    v_mov_b32_e32 v2, s2
; GFX12-NEXT:    s_wait_kmcnt 0x0
; GFX12-NEXT:    s_clause 0x1f
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5]
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:16
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:32
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:48
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:64
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:80
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:96
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:112
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:128
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:144
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:160
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:176
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:192
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:208
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:224
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:240
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:256
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:272
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:288
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:304
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:320
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:336
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:352
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:368
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:384
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:400
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:416
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:432
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:448
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:464
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:480
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:496
; GFX12-NEXT:    s_clause 0x1f
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:512
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:528
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:544
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:560
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:576
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:592
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:608
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:624
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:640
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:656
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:672
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:688
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:704
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:720
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:736
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:752
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:768
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:784
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:800
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:816
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:832
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:848
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:864
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:880
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:896
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:912
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:928
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:944
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:960
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:976
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:992
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:1008
; GFX12-NEXT:    s_clause 0x1
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:1024
; GFX12-NEXT:    global_store_b128 v4, v[0:3], s[4:5] offset:1040
; GFX12-NEXT:    s_nop 0
; GFX12-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX12-NEXT:    s_endpgm
  store <4 x i32> zeroinitializer, ptr addrspace(1) %p
  %ptr1 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 1
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr1
  %ptr2 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 2
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr2
  %ptr3 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 3
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr3
  %ptr4 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 4
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr4
  %ptr5 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 5
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr5
  %ptr6 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 6
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr6
  %ptr7 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 7
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr7
  %ptr8 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 8
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr8
  %ptr9 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 9
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr9
  %ptr10 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 10
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr10
  %ptr11 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 11
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr11
  %ptr12 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 12
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr12
  %ptr13 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 13
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr13
  %ptr14 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 14
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr14
  %ptr15 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 15
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr15
  %ptr16 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 16
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr16
  %ptr17 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 17
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr17
  %ptr18 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 18
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr18
  %ptr19 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 19
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr19
  %ptr20 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 20
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr20
  %ptr21 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 21
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr21
  %ptr22 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 22
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr22
  %ptr23 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 23
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr23
  %ptr24 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 24
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr24
  %ptr25 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 25
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr25
  %ptr26 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 26
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr26
  %ptr27 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 27
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr27
  %ptr28 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 28
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr28
  %ptr29 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 29
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr29
  %ptr30 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 30
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr30
  %ptr31 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 31
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr31
  %ptr32 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 32
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr32
  %ptr33 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 33
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr33
  %ptr34 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 34
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr34
  %ptr35 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 35
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr35
  %ptr36 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 36
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr36
  %ptr37 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 37
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr37
  %ptr38 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 38
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr38
  %ptr39 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 39
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr39
  %ptr40 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 40
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr40
  %ptr41 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 41
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr41
  %ptr42 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 42
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr42
  %ptr43 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 43
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr43
  %ptr44 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 44
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr44
  %ptr45 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 45
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr45
  %ptr46 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 46
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr46
  %ptr47 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 47
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr47
  %ptr48 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 48
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr48
  %ptr49 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 49
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr49
  %ptr50 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 50
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr50
  %ptr51 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 51
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr51
  %ptr52 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 52
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr52
  %ptr53 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 53
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr53
  %ptr54 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 54
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr54
  %ptr55 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 55
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr55
  %ptr56 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 56
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr56
  %ptr57 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 57
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr57
  %ptr58 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 58
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr58
  %ptr59 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 59
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr59
  %ptr60 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 60
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr60
  %ptr61 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 61
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr61
  %ptr62 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 62
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr62
  %ptr63 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 63
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr63
  %ptr64 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 64
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr64
  %ptr65 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 65
  store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr65
  ret void
}

;; Long chain of loads since gfx10 doesn't cluster stores.
;; Use i32 loads to save on register pressure
define amdgpu_kernel void @long_load_chain(ptr addrspace(1) %p) {
; GFX10-LABEL: long_load_chain:
; GFX10:       ; %bb.0:
; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
; GFX10-NEXT:    s_clause 0x3e
; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
; GFX10-NEXT:    s_load_dword s3, s[0:1], 0x10
; GFX10-NEXT:    s_load_dword s4, s[0:1], 0x20
; GFX10-NEXT:    s_load_dword s5, s[0:1], 0x30
; GFX10-NEXT:    s_load_dword s6, s[0:1], 0x40
; GFX10-NEXT:    s_load_dword s7, s[0:1], 0x50
; GFX10-NEXT:    s_load_dword s8, s[0:1], 0x60
; GFX10-NEXT:    s_load_dword s9, s[0:1], 0x70
; GFX10-NEXT:    s_load_dword s10, s[0:1], 0x80
; GFX10-NEXT:    s_load_dword s11, s[0:1], 0x90
; GFX10-NEXT:    s_load_dword s12, s[0:1], 0xa0
; GFX10-NEXT:    s_load_dword s13, s[0:1], 0xb0
; GFX10-NEXT:    s_load_dword s14, s[0:1], 0xc0
; GFX10-NEXT:    s_load_dword s15, s[0:1], 0xd0
; GFX10-NEXT:    s_load_dword s16, s[0:1], 0xe0
; GFX10-NEXT:    s_load_dword s17, s[0:1], 0xf0
; GFX10-NEXT:    s_load_dword s18, s[0:1], 0x100
; GFX10-NEXT:    s_load_dword s19, s[0:1], 0x110
; GFX10-NEXT:    s_load_dword s20, s[0:1], 0x120
; GFX10-NEXT:    s_load_dword s21, s[0:1], 0x130
; GFX10-NEXT:    s_load_dword s22, s[0:1], 0x140
; GFX10-NEXT:    s_load_dword s23, s[0:1], 0x150
; GFX10-NEXT:    s_load_dword s24, s[0:1], 0x160
; GFX10-NEXT:    s_load_dword s25, s[0:1], 0x170
; GFX10-NEXT:    s_load_dword s26, s[0:1], 0x180
; GFX10-NEXT:    s_load_dword s27, s[0:1], 0x190
; GFX10-NEXT:    s_load_dword s28, s[0:1], 0x1a0
; GFX10-NEXT:    s_load_dword s29, s[0:1], 0x1b0
; GFX10-NEXT:    s_load_dword s30, s[0:1], 0x1c0
; GFX10-NEXT:    s_load_dword s31, s[0:1], 0x1d0
; GFX10-NEXT:    s_load_dword s33, s[0:1], 0x1e0
; GFX10-NEXT:    s_load_dword s34, s[0:1], 0x1f0
; GFX10-NEXT:    s_load_dword s35, s[0:1], 0x200
; GFX10-NEXT:    s_load_dword s36, s[0:1], 0x210
; GFX10-NEXT:    s_load_dword s37, s[0:1], 0x220
; GFX10-NEXT:    s_load_dword s38, s[0:1], 0x230
; GFX10-NEXT:    s_load_dword s39, s[0:1], 0x240
; GFX10-NEXT:    s_load_dword s40, s[0:1], 0x250
; GFX10-NEXT:    s_load_dword s41, s[0:1], 0x260
; GFX10-NEXT:    s_load_dword s42, s[0:1], 0x270
; GFX10-NEXT:    s_load_dword s43, s[0:1], 0x280
; GFX10-NEXT:    s_load_dword s44, s[0:1], 0x290
; GFX10-NEXT:    s_load_dword s45, s[0:1], 0x2a0
; GFX10-NEXT:    s_load_dword s46, s[0:1], 0x2b0
; GFX10-NEXT:    s_load_dword s47, s[0:1], 0x2c0
; GFX10-NEXT:    s_load_dword s48, s[0:1], 0x2d0
; GFX10-NEXT:    s_load_dword s49, s[0:1], 0x2e0
; GFX10-NEXT:    s_load_dword s50, s[0:1], 0x2f0
; GFX10-NEXT:    s_load_dword s51, s[0:1], 0x300
; GFX10-NEXT:    s_load_dword s52, s[0:1], 0x310
; GFX10-NEXT:    s_load_dword s53, s[0:1], 0x320
; GFX10-NEXT:    s_load_dword s54, s[0:1], 0x330
; GFX10-NEXT:    s_load_dword s55, s[0:1], 0x340
; GFX10-NEXT:    s_load_dword s56, s[0:1], 0x350
; GFX10-NEXT:    s_load_dword s57, s[0:1], 0x360
; GFX10-NEXT:    s_load_dword s58, s[0:1], 0x370
; GFX10-NEXT:    s_load_dword s59, s[0:1], 0x380
; GFX10-NEXT:    s_load_dword s60, s[0:1], 0x390
; GFX10-NEXT:    s_load_dword s61, s[0:1], 0x3a0
; GFX10-NEXT:    s_load_dword s62, s[0:1], 0x3b0
; GFX10-NEXT:    s_load_dword s63, s[0:1], 0x3c0
; GFX10-NEXT:    s_load_dword s64, s[0:1], 0x3d0
; GFX10-NEXT:    s_load_dword s65, s[0:1], 0x3e0
; GFX10-NEXT:    s_clause 0x2
; GFX10-NEXT:    s_load_dword s66, s[0:1], 0x3f0
; GFX10-NEXT:    s_load_dword s67, s[0:1], 0x400
; GFX10-NEXT:    s_load_dword s0, s[0:1], 0x410
; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s2
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s3
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s4
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s5
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s6
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s7
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s8
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s9
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s10
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s11
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s12
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s13
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s14
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s15
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s16
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s17
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s18
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s19
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s20
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s21
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s22
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s23
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s24
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s25
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s26
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s27
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s28
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s29
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s30
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s31
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s33
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s34
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s35
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s36
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s37
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s38
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s39
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s40
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s41
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s42
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s43
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s44
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s45
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s46
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s47
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s48
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s49
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s50
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s51
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s52
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s53
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s54
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s55
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s56
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s57
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s58
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s59
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s60
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s61
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s62
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s63
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s64
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s65
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s66
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s67
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s0
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    s_endpgm
;
; GFX11-LABEL: long_load_chain:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x24
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    s_clause 0x1f
; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x0
; GFX11-NEXT:    s_load_b32 s3, s[0:1], 0x10
; GFX11-NEXT:    s_load_b32 s4, s[0:1], 0x20
; GFX11-NEXT:    s_load_b32 s5, s[0:1], 0x30
; GFX11-NEXT:    s_load_b32 s6, s[0:1], 0x40
; GFX11-NEXT:    s_load_b32 s7, s[0:1], 0x50
; GFX11-NEXT:    s_load_b32 s8, s[0:1], 0x60
; GFX11-NEXT:    s_load_b32 s9, s[0:1], 0x70
; GFX11-NEXT:    s_load_b32 s10, s[0:1], 0x80
; GFX11-NEXT:    s_load_b32 s11, s[0:1], 0x90
; GFX11-NEXT:    s_load_b32 s12, s[0:1], 0xa0
; GFX11-NEXT:    s_load_b32 s13, s[0:1], 0xb0
; GFX11-NEXT:    s_load_b32 s14, s[0:1], 0xc0
; GFX11-NEXT:    s_load_b32 s15, s[0:1], 0xd0
; GFX11-NEXT:    s_load_b32 s16, s[0:1], 0xe0
; GFX11-NEXT:    s_load_b32 s17, s[0:1], 0xf0
; GFX11-NEXT:    s_load_b32 s18, s[0:1], 0x100
; GFX11-NEXT:    s_load_b32 s19, s[0:1], 0x110
; GFX11-NEXT:    s_load_b32 s20, s[0:1], 0x120
; GFX11-NEXT:    s_load_b32 s21, s[0:1], 0x130
; GFX11-NEXT:    s_load_b32 s22, s[0:1], 0x140
; GFX11-NEXT:    s_load_b32 s23, s[0:1], 0x150
; GFX11-NEXT:    s_load_b32 s24, s[0:1], 0x160
; GFX11-NEXT:    s_load_b32 s25, s[0:1], 0x170
; GFX11-NEXT:    s_load_b32 s26, s[0:1], 0x180
; GFX11-NEXT:    s_load_b32 s27, s[0:1], 0x190
; GFX11-NEXT:    s_load_b32 s28, s[0:1], 0x1a0
; GFX11-NEXT:    s_load_b32 s29, s[0:1], 0x1b0
; GFX11-NEXT:    s_load_b32 s30, s[0:1], 0x1c0
; GFX11-NEXT:    s_load_b32 s31, s[0:1], 0x1d0
; GFX11-NEXT:    s_load_b32 s33, s[0:1], 0x1e0
; GFX11-NEXT:    s_load_b32 s34, s[0:1], 0x1f0
; GFX11-NEXT:    s_clause 0x1f
; GFX11-NEXT:    s_load_b32 s35, s[0:1], 0x200
; GFX11-NEXT:    s_load_b32 s36, s[0:1], 0x210
; GFX11-NEXT:    s_load_b32 s37, s[0:1], 0x220
; GFX11-NEXT:    s_load_b32 s38, s[0:1], 0x230
; GFX11-NEXT:    s_load_b32 s39, s[0:1], 0x240
; GFX11-NEXT:    s_load_b32 s40, s[0:1], 0x250
; GFX11-NEXT:    s_load_b32 s41, s[0:1], 0x260
; GFX11-NEXT:    s_load_b32 s42, s[0:1], 0x270
; GFX11-NEXT:    s_load_b32 s43, s[0:1], 0x280
; GFX11-NEXT:    s_load_b32 s44, s[0:1], 0x290
; GFX11-NEXT:    s_load_b32 s45, s[0:1], 0x2a0
; GFX11-NEXT:    s_load_b32 s46, s[0:1], 0x2b0
; GFX11-NEXT:    s_load_b32 s47, s[0:1], 0x2c0
; GFX11-NEXT:    s_load_b32 s48, s[0:1], 0x2d0
; GFX11-NEXT:    s_load_b32 s49, s[0:1], 0x2e0
; GFX11-NEXT:    s_load_b32 s50, s[0:1], 0x2f0
; GFX11-NEXT:    s_load_b32 s51, s[0:1], 0x300
; GFX11-NEXT:    s_load_b32 s52, s[0:1], 0x310
; GFX11-NEXT:    s_load_b32 s53, s[0:1], 0x320
; GFX11-NEXT:    s_load_b32 s54, s[0:1], 0x330
; GFX11-NEXT:    s_load_b32 s55, s[0:1], 0x340
; GFX11-NEXT:    s_load_b32 s56, s[0:1], 0x350
; GFX11-NEXT:    s_load_b32 s57, s[0:1], 0x360
; GFX11-NEXT:    s_load_b32 s58, s[0:1], 0x370
; GFX11-NEXT:    s_load_b32 s59, s[0:1], 0x380
; GFX11-NEXT:    s_load_b32 s60, s[0:1], 0x390
; GFX11-NEXT:    s_load_b32 s61, s[0:1], 0x3a0
; GFX11-NEXT:    s_load_b32 s62, s[0:1], 0x3b0
; GFX11-NEXT:    s_load_b32 s63, s[0:1], 0x3c0
; GFX11-NEXT:    s_load_b32 s64, s[0:1], 0x3d0
; GFX11-NEXT:    s_load_b32 s65, s[0:1], 0x3e0
; GFX11-NEXT:    s_load_b32 s66, s[0:1], 0x3f0
; GFX11-NEXT:    s_clause 0x1
; GFX11-NEXT:    s_load_b32 s67, s[0:1], 0x400
; GFX11-NEXT:    s_load_b32 s0, s[0:1], 0x410
; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s2
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s3
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s4
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s5
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s6
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s7
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s8
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s9
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s10
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s11
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s12
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s13
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s14
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s15
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s16
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s17
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s18
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s19
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s20
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s21
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s22
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s23
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s24
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s25
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s26
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s27
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s28
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s29
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s30
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s31
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s33
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s34
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s35
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s36
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s37
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s38
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s39
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s40
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s41
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s42
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s43
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s44
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s45
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s46
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s47
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s48
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s49
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s50
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s51
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s52
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s53
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s54
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s55
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s56
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s57
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s58
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s59
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s60
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s61
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s62
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s63
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s64
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s65
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s66
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s67
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s0
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    s_endpgm
;
; GFX12-LABEL: long_load_chain:
; GFX12:       ; %bb.0:
; GFX12-NEXT:    s_load_b64 s[0:1], s[2:3], 0x24
; GFX12-NEXT:    s_wait_kmcnt 0x0
; GFX12-NEXT:    s_clause 0x1f
; GFX12-NEXT:    s_load_b32 s2, s[0:1], 0x0
; GFX12-NEXT:    s_load_b32 s3, s[0:1], 0x10
; GFX12-NEXT:    s_load_b32 s4, s[0:1], 0x20
; GFX12-NEXT:    s_load_b32 s5, s[0:1], 0x30
; GFX12-NEXT:    s_load_b32 s6, s[0:1], 0x40
; GFX12-NEXT:    s_load_b32 s7, s[0:1], 0x50
; GFX12-NEXT:    s_load_b32 s8, s[0:1], 0x60
; GFX12-NEXT:    s_load_b32 s9, s[0:1], 0x70
; GFX12-NEXT:    s_load_b32 s10, s[0:1], 0x80
; GFX12-NEXT:    s_load_b32 s11, s[0:1], 0x90
; GFX12-NEXT:    s_load_b32 s12, s[0:1], 0xa0
; GFX12-NEXT:    s_load_b32 s13, s[0:1], 0xb0
; GFX12-NEXT:    s_load_b32 s14, s[0:1], 0xc0
; GFX12-NEXT:    s_load_b32 s15, s[0:1], 0xd0
; GFX12-NEXT:    s_load_b32 s16, s[0:1], 0xe0
; GFX12-NEXT:    s_load_b32 s17, s[0:1], 0xf0
; GFX12-NEXT:    s_load_b32 s18, s[0:1], 0x100
; GFX12-NEXT:    s_load_b32 s19, s[0:1], 0x110
; GFX12-NEXT:    s_load_b32 s20, s[0:1], 0x120
; GFX12-NEXT:    s_load_b32 s21, s[0:1], 0x130
; GFX12-NEXT:    s_load_b32 s22, s[0:1], 0x140
; GFX12-NEXT:    s_load_b32 s23, s[0:1], 0x150
; GFX12-NEXT:    s_load_b32 s24, s[0:1], 0x160
; GFX12-NEXT:    s_load_b32 s25, s[0:1], 0x170
; GFX12-NEXT:    s_load_b32 s26, s[0:1], 0x180
; GFX12-NEXT:    s_load_b32 s27, s[0:1], 0x190
; GFX12-NEXT:    s_load_b32 s28, s[0:1], 0x1a0
; GFX12-NEXT:    s_load_b32 s29, s[0:1], 0x1b0
; GFX12-NEXT:    s_load_b32 s30, s[0:1], 0x1c0
; GFX12-NEXT:    s_load_b32 s31, s[0:1], 0x1d0
; GFX12-NEXT:    s_load_b32 s33, s[0:1], 0x1e0
; GFX12-NEXT:    s_load_b32 s34, s[0:1], 0x1f0
; GFX12-NEXT:    s_clause 0x1f
; GFX12-NEXT:    s_load_b32 s35, s[0:1], 0x200
; GFX12-NEXT:    s_load_b32 s36, s[0:1], 0x210
; GFX12-NEXT:    s_load_b32 s37, s[0:1], 0x220
; GFX12-NEXT:    s_load_b32 s38, s[0:1], 0x230
; GFX12-NEXT:    s_load_b32 s39, s[0:1], 0x240
; GFX12-NEXT:    s_load_b32 s40, s[0:1], 0x250
; GFX12-NEXT:    s_load_b32 s41, s[0:1], 0x260
; GFX12-NEXT:    s_load_b32 s42, s[0:1], 0x270
; GFX12-NEXT:    s_load_b32 s43, s[0:1], 0x280
; GFX12-NEXT:    s_load_b32 s44, s[0:1], 0x290
; GFX12-NEXT:    s_load_b32 s45, s[0:1], 0x2a0
; GFX12-NEXT:    s_load_b32 s46, s[0:1], 0x2b0
; GFX12-NEXT:    s_load_b32 s47, s[0:1], 0x2c0
; GFX12-NEXT:    s_load_b32 s48, s[0:1], 0x2d0
; GFX12-NEXT:    s_load_b32 s49, s[0:1], 0x2e0
; GFX12-NEXT:    s_load_b32 s50, s[0:1], 0x2f0
; GFX12-NEXT:    s_load_b32 s51, s[0:1], 0x300
; GFX12-NEXT:    s_load_b32 s52, s[0:1], 0x310
; GFX12-NEXT:    s_load_b32 s53, s[0:1], 0x320
; GFX12-NEXT:    s_load_b32 s54, s[0:1], 0x330
; GFX12-NEXT:    s_load_b32 s55, s[0:1], 0x340
; GFX12-NEXT:    s_load_b32 s56, s[0:1], 0x350
; GFX12-NEXT:    s_load_b32 s57, s[0:1], 0x360
; GFX12-NEXT:    s_load_b32 s58, s[0:1], 0x370
; GFX12-NEXT:    s_load_b32 s59, s[0:1], 0x380
; GFX12-NEXT:    s_load_b32 s60, s[0:1], 0x390
; GFX12-NEXT:    s_load_b32 s61, s[0:1], 0x3a0
; GFX12-NEXT:    s_load_b32 s62, s[0:1], 0x3b0
; GFX12-NEXT:    s_load_b32 s63, s[0:1], 0x3c0
; GFX12-NEXT:    s_load_b32 s64, s[0:1], 0x3d0
; GFX12-NEXT:    s_load_b32 s65, s[0:1], 0x3e0
; GFX12-NEXT:    s_load_b32 s66, s[0:1], 0x3f0
; GFX12-NEXT:    s_clause 0x1
; GFX12-NEXT:    s_load_b32 s67, s[0:1], 0x400
; GFX12-NEXT:    s_load_b32 s0, s[0:1], 0x410
; GFX12-NEXT:    s_wait_kmcnt 0x0
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s2
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s3
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s4
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s5
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s6
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s7
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s8
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s9
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s10
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s11
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s12
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s13
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s14
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s15
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s16
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s17
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s18
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s19
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s20
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s21
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s22
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s23
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s24
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s25
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s26
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s27
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s28
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s29
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s30
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s31
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s33
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s34
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s35
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s36
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s37
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s38
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s39
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s40
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s41
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s42
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s43
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s44
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s45
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s46
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s47
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s48
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s49
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s50
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s51
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s52
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s53
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s54
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s55
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s56
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s57
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s58
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s59
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s60
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s61
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s62
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s63
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s64
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s65
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s66
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s67
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    ;;#ASMSTART
; GFX12-NEXT:    ; use s0
; GFX12-NEXT:    ;;#ASMEND
; GFX12-NEXT:    s_endpgm
  %v0 = load i32, ptr addrspace(1) %p
  %ptr1 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 1
  %v1 = load i32, ptr addrspace(1) %ptr1
  %ptr2 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 2
  %v2 = load i32, ptr addrspace(1) %ptr2
  %ptr3 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 3
  %v3 = load i32, ptr addrspace(1) %ptr3
  %ptr4 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 4
  %v4 = load i32, ptr addrspace(1) %ptr4
  %ptr5 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 5
  %v5 = load i32, ptr addrspace(1) %ptr5
  %ptr6 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 6
  %v6 = load i32, ptr addrspace(1) %ptr6
  %ptr7 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 7
  %v7 = load i32, ptr addrspace(1) %ptr7
  %ptr8 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 8
  %v8 = load i32, ptr addrspace(1) %ptr8
  %ptr9 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 9
  %v9 = load i32, ptr addrspace(1) %ptr9
  %ptr10 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 10
  %v10 = load i32, ptr addrspace(1) %ptr10
  %ptr11 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 11
  %v11 = load i32, ptr addrspace(1) %ptr11
  %ptr12 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 12
  %v12 = load i32, ptr addrspace(1) %ptr12
  %ptr13 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 13
  %v13 = load i32, ptr addrspace(1) %ptr13
  %ptr14 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 14
  %v14 = load i32, ptr addrspace(1) %ptr14
  %ptr15 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 15
  %v15 = load i32, ptr addrspace(1) %ptr15
  %ptr16 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 16
  %v16 = load i32, ptr addrspace(1) %ptr16
  %ptr17 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 17
  %v17 = load i32, ptr addrspace(1) %ptr17
  %ptr18 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 18
  %v18 = load i32, ptr addrspace(1) %ptr18
  %ptr19 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 19
  %v19 = load i32, ptr addrspace(1) %ptr19
  %ptr20 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 20
  %v20 = load i32, ptr addrspace(1) %ptr20
  %ptr21 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 21
  %v21 = load i32, ptr addrspace(1) %ptr21
  %ptr22 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 22
  %v22 = load i32, ptr addrspace(1) %ptr22
  %ptr23 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 23
  %v23 = load i32, ptr addrspace(1) %ptr23
  %ptr24 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 24
  %v24 = load i32, ptr addrspace(1) %ptr24
  %ptr25 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 25
  %v25 = load i32, ptr addrspace(1) %ptr25
  %ptr26 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 26
  %v26 = load i32, ptr addrspace(1) %ptr26
  %ptr27 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 27
  %v27 = load i32, ptr addrspace(1) %ptr27
  %ptr28 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 28
  %v28 = load i32, ptr addrspace(1) %ptr28
  %ptr29 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 29
  %v29 = load i32, ptr addrspace(1) %ptr29
  %ptr30 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 30
  %v30 = load i32, ptr addrspace(1) %ptr30
  %ptr31 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 31
  %v31 = load i32, ptr addrspace(1) %ptr31
  %ptr32 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 32
  %v32 = load i32, ptr addrspace(1) %ptr32
  %ptr33 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 33
  %v33 = load i32, ptr addrspace(1) %ptr33
  %ptr34 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 34
  %v34 = load i32, ptr addrspace(1) %ptr34
  %ptr35 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 35
  %v35 = load i32, ptr addrspace(1) %ptr35
  %ptr36 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 36
  %v36 = load i32, ptr addrspace(1) %ptr36
  %ptr37 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 37
  %v37 = load i32, ptr addrspace(1) %ptr37
  %ptr38 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 38
  %v38 = load i32, ptr addrspace(1) %ptr38
  %ptr39 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 39
  %v39 = load i32, ptr addrspace(1) %ptr39
  %ptr40 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 40
  %v40 = load i32, ptr addrspace(1) %ptr40
  %ptr41 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 41
  %v41 = load i32, ptr addrspace(1) %ptr41
  %ptr42 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 42
  %v42 = load i32, ptr addrspace(1) %ptr42
  %ptr43 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 43
  %v43 = load i32, ptr addrspace(1) %ptr43
  %ptr44 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 44
  %v44 = load i32, ptr addrspace(1) %ptr44
  %ptr45 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 45
  %v45 = load i32, ptr addrspace(1) %ptr45
  %ptr46 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 46
  %v46 = load i32, ptr addrspace(1) %ptr46
  %ptr47 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 47
  %v47 = load i32, ptr addrspace(1) %ptr47
  %ptr48 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 48
  %v48 = load i32, ptr addrspace(1) %ptr48
  %ptr49 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 49
  %v49 = load i32, ptr addrspace(1) %ptr49
  %ptr50 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 50
  %v50 = load i32, ptr addrspace(1) %ptr50
  %ptr51 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 51
  %v51 = load i32, ptr addrspace(1) %ptr51
  %ptr52 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 52
  %v52 = load i32, ptr addrspace(1) %ptr52
  %ptr53 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 53
  %v53 = load i32, ptr addrspace(1) %ptr53
  %ptr54 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 54
  %v54 = load i32, ptr addrspace(1) %ptr54
  %ptr55 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 55
  %v55 = load i32, ptr addrspace(1) %ptr55
  %ptr56 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 56
  %v56 = load i32, ptr addrspace(1) %ptr56
  %ptr57 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 57
  %v57 = load i32, ptr addrspace(1) %ptr57
  %ptr58 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 58
  %v58 = load i32, ptr addrspace(1) %ptr58
  %ptr59 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 59
  %v59 = load i32, ptr addrspace(1) %ptr59
  %ptr60 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 60
  %v60 = load i32, ptr addrspace(1) %ptr60
  %ptr61 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 61
  %v61 = load i32, ptr addrspace(1) %ptr61
  %ptr62 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 62
  %v62 = load i32, ptr addrspace(1) %ptr62
  %ptr63 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 63
  %v63 = load i32, ptr addrspace(1) %ptr63
  %ptr64 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 64
  %v64 = load i32, ptr addrspace(1) %ptr64
  %ptr65 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 65
  %v65 = load i32, ptr addrspace(1) %ptr65

  call void asm sideeffect "; use $0", "s"(i32 %v0)
  call void asm sideeffect "; use $0", "s"(i32 %v1)
  call void asm sideeffect "; use $0", "s"(i32 %v2)
  call void asm sideeffect "; use $0", "s"(i32 %v3)
  call void asm sideeffect "; use $0", "s"(i32 %v4)
  call void asm sideeffect "; use $0", "s"(i32 %v5)
  call void asm sideeffect "; use $0", "s"(i32 %v6)
  call void asm sideeffect "; use $0", "s"(i32 %v7)
  call void asm sideeffect "; use $0", "s"(i32 %v8)
  call void asm sideeffect "; use $0", "s"(i32 %v9)
  call void asm sideeffect "; use $0", "s"(i32 %v10)
  call void asm sideeffect "; use $0", "s"(i32 %v11)
  call void asm sideeffect "; use $0", "s"(i32 %v12)
  call void asm sideeffect "; use $0", "s"(i32 %v13)
  call void asm sideeffect "; use $0", "s"(i32 %v14)
  call void asm sideeffect "; use $0", "s"(i32 %v15)
  call void asm sideeffect "; use $0", "s"(i32 %v16)
  call void asm sideeffect "; use $0", "s"(i32 %v17)
  call void asm sideeffect "; use $0", "s"(i32 %v18)
  call void asm sideeffect "; use $0", "s"(i32 %v19)
  call void asm sideeffect "; use $0", "s"(i32 %v20)
  call void asm sideeffect "; use $0", "s"(i32 %v21)
  call void asm sideeffect "; use $0", "s"(i32 %v22)
  call void asm sideeffect "; use $0", "s"(i32 %v23)
  call void asm sideeffect "; use $0", "s"(i32 %v24)
  call void asm sideeffect "; use $0", "s"(i32 %v25)
  call void asm sideeffect "; use $0", "s"(i32 %v26)
  call void asm sideeffect "; use $0", "s"(i32 %v27)
  call void asm sideeffect "; use $0", "s"(i32 %v28)
  call void asm sideeffect "; use $0", "s"(i32 %v29)
  call void asm sideeffect "; use $0", "s"(i32 %v30)
  call void asm sideeffect "; use $0", "s"(i32 %v31)
  call void asm sideeffect "; use $0", "s"(i32 %v32)
  call void asm sideeffect "; use $0", "s"(i32 %v33)
  call void asm sideeffect "; use $0", "s"(i32 %v34)
  call void asm sideeffect "; use $0", "s"(i32 %v35)
  call void asm sideeffect "; use $0", "s"(i32 %v36)
  call void asm sideeffect "; use $0", "s"(i32 %v37)
  call void asm sideeffect "; use $0", "s"(i32 %v38)
  call void asm sideeffect "; use $0", "s"(i32 %v39)
  call void asm sideeffect "; use $0", "s"(i32 %v40)
  call void asm sideeffect "; use $0", "s"(i32 %v41)
  call void asm sideeffect "; use $0", "s"(i32 %v42)
  call void asm sideeffect "; use $0", "s"(i32 %v43)
  call void asm sideeffect "; use $0", "s"(i32 %v44)
  call void asm sideeffect "; use $0", "s"(i32 %v45)
  call void asm sideeffect "; use $0", "s"(i32 %v46)
  call void asm sideeffect "; use $0", "s"(i32 %v47)
  call void asm sideeffect "; use $0", "s"(i32 %v48)
  call void asm sideeffect "; use $0", "s"(i32 %v49)
  call void asm sideeffect "; use $0", "s"(i32 %v50)
  call void asm sideeffect "; use $0", "s"(i32 %v51)
  call void asm sideeffect "; use $0", "s"(i32 %v52)
  call void asm sideeffect "; use $0", "s"(i32 %v53)
  call void asm sideeffect "; use $0", "s"(i32 %v54)
  call void asm sideeffect "; use $0", "s"(i32 %v55)
  call void asm sideeffect "; use $0", "s"(i32 %v56)
  call void asm sideeffect "; use $0", "s"(i32 %v57)
  call void asm sideeffect "; use $0", "s"(i32 %v58)
  call void asm sideeffect "; use $0", "s"(i32 %v59)
  call void asm sideeffect "; use $0", "s"(i32 %v60)
  call void asm sideeffect "; use $0", "s"(i32 %v61)
  call void asm sideeffect "; use $0", "s"(i32 %v62)
  call void asm sideeffect "; use $0", "s"(i32 %v63)
  call void asm sideeffect "; use $0", "s"(i32 %v64)
  call void asm sideeffect "; use $0", "s"(i32 %v65)

  ret void
}