; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1030 | FileCheck %s -check-prefixes=GFX10
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 | FileCheck %s -check-prefix=GFX11
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefix=GFX12
;; Check that consecutive store operations are grouped greedily into
;; hard clauses of the appropriate length for each target.
;; This test uses <4 x i32> stores in order to prevent the stores from
;; being combined into larger operations due to their adjecency.
define amdgpu_kernel void @long_store_chain(ptr addrspace(1) %p) {
; GFX10-LABEL: long_store_chain:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
; GFX10-NEXT: s_mov_b32 s0, 0
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: s_mov_b32 s1, s0
; GFX10-NEXT: s_mov_b32 s2, s0
; GFX10-NEXT: s_mov_b32 s3, s0
; GFX10-NEXT: v_mov_b32_e32 v0, s0
; GFX10-NEXT: v_mov_b32_e32 v1, s1
; GFX10-NEXT: v_mov_b32_e32 v2, s2
; GFX10-NEXT: v_mov_b32_e32 v3, s3
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5]
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:16
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:32
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:48
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:64
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:80
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:96
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:112
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:128
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:144
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:160
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:176
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:192
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:208
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:224
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:240
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:256
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:272
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:288
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:304
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:320
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:336
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:352
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:368
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:384
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:400
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:416
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:432
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:448
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:464
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:480
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:496
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:512
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:528
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:544
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:560
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:576
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:592
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:608
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:624
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:640
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:656
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:672
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:688
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:704
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:720
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:736
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:752
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:768
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:784
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:800
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:816
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:832
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:848
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:864
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:880
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:896
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:912
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:928
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:944
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:960
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:976
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:992
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:1008
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:1024
; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] offset:1040
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: long_store_chain:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[4:5], s[2:3], 0x24
; GFX11-NEXT: s_mov_b32 s0, 0
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_mov_b32 s1, s0
; GFX11-NEXT: s_mov_b32 s2, s0
; GFX11-NEXT: s_mov_b32 s3, s0
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_mov_b32_e32 v2, s2
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1f
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5]
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:16
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:32
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:48
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:64
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:80
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:96
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:112
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:128
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:144
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:160
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:176
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:192
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:208
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:224
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:240
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:256
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:272
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:288
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:304
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:320
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:336
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:352
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:368
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:384
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:400
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:416
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:432
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:448
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:464
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:480
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:496
; GFX11-NEXT: s_clause 0x1f
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:512
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:528
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:544
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:560
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:576
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:592
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:608
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:624
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:640
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:656
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:672
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:688
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:704
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:720
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:736
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:752
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:768
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:784
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:800
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:816
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:832
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:848
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:864
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:880
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:896
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:912
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:928
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:944
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:960
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:976
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:992
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:1008
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:1024
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:1040
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: long_store_chain:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b64 s[4:5], s[2:3], 0x24
; GFX12-NEXT: s_mov_b32 s0, 0
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: s_mov_b32 s1, s0
; GFX12-NEXT: s_mov_b32 s2, s0
; GFX12-NEXT: s_mov_b32 s3, s0
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
; GFX12-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s1
; GFX12-NEXT: v_mov_b32_e32 v2, s2
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_clause 0x1f
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5]
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:16
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:32
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:48
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:64
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:80
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:96
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:112
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:128
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:144
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:160
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:176
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:192
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:208
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:224
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:240
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:256
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:272
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:288
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:304
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:320
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:336
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:352
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:368
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:384
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:400
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:416
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:432
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:448
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:464
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:480
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:496
; GFX12-NEXT: s_clause 0x1f
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:512
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:528
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:544
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:560
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:576
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:592
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:608
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:624
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:640
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:656
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:672
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:688
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:704
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:720
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:736
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:752
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:768
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:784
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:800
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:816
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:832
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:848
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:864
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:880
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:896
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:912
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:928
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:944
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:960
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:976
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:992
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:1008
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:1024
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[4:5] offset:1040
; GFX12-NEXT: s_nop 0
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX12-NEXT: s_endpgm
store <4 x i32> zeroinitializer, ptr addrspace(1) %p
%ptr1 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 1
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr1
%ptr2 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 2
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr2
%ptr3 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 3
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr3
%ptr4 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 4
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr4
%ptr5 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 5
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr5
%ptr6 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 6
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr6
%ptr7 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 7
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr7
%ptr8 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 8
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr8
%ptr9 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 9
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr9
%ptr10 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 10
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr10
%ptr11 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 11
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr11
%ptr12 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 12
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr12
%ptr13 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 13
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr13
%ptr14 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 14
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr14
%ptr15 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 15
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr15
%ptr16 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 16
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr16
%ptr17 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 17
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr17
%ptr18 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 18
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr18
%ptr19 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 19
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr19
%ptr20 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 20
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr20
%ptr21 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 21
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr21
%ptr22 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 22
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr22
%ptr23 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 23
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr23
%ptr24 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 24
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr24
%ptr25 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 25
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr25
%ptr26 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 26
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr26
%ptr27 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 27
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr27
%ptr28 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 28
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr28
%ptr29 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 29
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr29
%ptr30 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 30
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr30
%ptr31 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 31
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr31
%ptr32 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 32
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr32
%ptr33 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 33
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr33
%ptr34 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 34
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr34
%ptr35 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 35
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr35
%ptr36 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 36
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr36
%ptr37 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 37
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr37
%ptr38 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 38
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr38
%ptr39 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 39
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr39
%ptr40 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 40
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr40
%ptr41 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 41
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr41
%ptr42 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 42
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr42
%ptr43 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 43
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr43
%ptr44 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 44
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr44
%ptr45 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 45
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr45
%ptr46 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 46
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr46
%ptr47 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 47
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr47
%ptr48 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 48
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr48
%ptr49 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 49
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr49
%ptr50 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 50
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr50
%ptr51 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 51
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr51
%ptr52 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 52
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr52
%ptr53 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 53
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr53
%ptr54 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 54
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr54
%ptr55 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 55
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr55
%ptr56 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 56
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr56
%ptr57 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 57
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr57
%ptr58 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 58
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr58
%ptr59 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 59
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr59
%ptr60 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 60
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr60
%ptr61 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 61
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr61
%ptr62 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 62
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr62
%ptr63 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 63
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr63
%ptr64 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 64
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr64
%ptr65 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 65
store <4 x i32> zeroinitializer, ptr addrspace(1) %ptr65
ret void
}
;; Long chain of loads since gfx10 doesn't cluster stores.
;; Use i32 loads to save on register pressure
define amdgpu_kernel void @long_load_chain(ptr addrspace(1) %p) {
; GFX10-LABEL: long_load_chain:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_clause 0x3e
; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0
; GFX10-NEXT: s_load_dword s3, s[0:1], 0x10
; GFX10-NEXT: s_load_dword s4, s[0:1], 0x20
; GFX10-NEXT: s_load_dword s5, s[0:1], 0x30
; GFX10-NEXT: s_load_dword s6, s[0:1], 0x40
; GFX10-NEXT: s_load_dword s7, s[0:1], 0x50
; GFX10-NEXT: s_load_dword s8, s[0:1], 0x60
; GFX10-NEXT: s_load_dword s9, s[0:1], 0x70
; GFX10-NEXT: s_load_dword s10, s[0:1], 0x80
; GFX10-NEXT: s_load_dword s11, s[0:1], 0x90
; GFX10-NEXT: s_load_dword s12, s[0:1], 0xa0
; GFX10-NEXT: s_load_dword s13, s[0:1], 0xb0
; GFX10-NEXT: s_load_dword s14, s[0:1], 0xc0
; GFX10-NEXT: s_load_dword s15, s[0:1], 0xd0
; GFX10-NEXT: s_load_dword s16, s[0:1], 0xe0
; GFX10-NEXT: s_load_dword s17, s[0:1], 0xf0
; GFX10-NEXT: s_load_dword s18, s[0:1], 0x100
; GFX10-NEXT: s_load_dword s19, s[0:1], 0x110
; GFX10-NEXT: s_load_dword s20, s[0:1], 0x120
; GFX10-NEXT: s_load_dword s21, s[0:1], 0x130
; GFX10-NEXT: s_load_dword s22, s[0:1], 0x140
; GFX10-NEXT: s_load_dword s23, s[0:1], 0x150
; GFX10-NEXT: s_load_dword s24, s[0:1], 0x160
; GFX10-NEXT: s_load_dword s25, s[0:1], 0x170
; GFX10-NEXT: s_load_dword s26, s[0:1], 0x180
; GFX10-NEXT: s_load_dword s27, s[0:1], 0x190
; GFX10-NEXT: s_load_dword s28, s[0:1], 0x1a0
; GFX10-NEXT: s_load_dword s29, s[0:1], 0x1b0
; GFX10-NEXT: s_load_dword s30, s[0:1], 0x1c0
; GFX10-NEXT: s_load_dword s31, s[0:1], 0x1d0
; GFX10-NEXT: s_load_dword s33, s[0:1], 0x1e0
; GFX10-NEXT: s_load_dword s34, s[0:1], 0x1f0
; GFX10-NEXT: s_load_dword s35, s[0:1], 0x200
; GFX10-NEXT: s_load_dword s36, s[0:1], 0x210
; GFX10-NEXT: s_load_dword s37, s[0:1], 0x220
; GFX10-NEXT: s_load_dword s38, s[0:1], 0x230
; GFX10-NEXT: s_load_dword s39, s[0:1], 0x240
; GFX10-NEXT: s_load_dword s40, s[0:1], 0x250
; GFX10-NEXT: s_load_dword s41, s[0:1], 0x260
; GFX10-NEXT: s_load_dword s42, s[0:1], 0x270
; GFX10-NEXT: s_load_dword s43, s[0:1], 0x280
; GFX10-NEXT: s_load_dword s44, s[0:1], 0x290
; GFX10-NEXT: s_load_dword s45, s[0:1], 0x2a0
; GFX10-NEXT: s_load_dword s46, s[0:1], 0x2b0
; GFX10-NEXT: s_load_dword s47, s[0:1], 0x2c0
; GFX10-NEXT: s_load_dword s48, s[0:1], 0x2d0
; GFX10-NEXT: s_load_dword s49, s[0:1], 0x2e0
; GFX10-NEXT: s_load_dword s50, s[0:1], 0x2f0
; GFX10-NEXT: s_load_dword s51, s[0:1], 0x300
; GFX10-NEXT: s_load_dword s52, s[0:1], 0x310
; GFX10-NEXT: s_load_dword s53, s[0:1], 0x320
; GFX10-NEXT: s_load_dword s54, s[0:1], 0x330
; GFX10-NEXT: s_load_dword s55, s[0:1], 0x340
; GFX10-NEXT: s_load_dword s56, s[0:1], 0x350
; GFX10-NEXT: s_load_dword s57, s[0:1], 0x360
; GFX10-NEXT: s_load_dword s58, s[0:1], 0x370
; GFX10-NEXT: s_load_dword s59, s[0:1], 0x380
; GFX10-NEXT: s_load_dword s60, s[0:1], 0x390
; GFX10-NEXT: s_load_dword s61, s[0:1], 0x3a0
; GFX10-NEXT: s_load_dword s62, s[0:1], 0x3b0
; GFX10-NEXT: s_load_dword s63, s[0:1], 0x3c0
; GFX10-NEXT: s_load_dword s64, s[0:1], 0x3d0
; GFX10-NEXT: s_load_dword s65, s[0:1], 0x3e0
; GFX10-NEXT: s_clause 0x2
; GFX10-NEXT: s_load_dword s66, s[0:1], 0x3f0
; GFX10-NEXT: s_load_dword s67, s[0:1], 0x400
; GFX10-NEXT: s_load_dword s0, s[0:1], 0x410
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s2
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s3
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s4
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s5
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s6
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s7
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s8
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s9
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s10
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s11
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s12
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s13
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s14
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s15
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s16
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s17
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s18
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s19
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s20
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s21
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s22
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s23
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s24
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s25
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s26
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s27
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s28
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s29
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s30
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s31
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s33
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s34
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s35
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s36
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s37
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s38
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s39
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s40
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s41
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s42
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s43
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s44
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s45
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s46
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s47
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s48
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s49
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s50
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s51
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s52
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s53
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s54
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s55
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s56
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s57
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s58
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s59
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s60
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s61
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s62
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s63
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s64
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s65
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s66
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s67
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s0
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: long_load_chain:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1f
; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x0
; GFX11-NEXT: s_load_b32 s3, s[0:1], 0x10
; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x20
; GFX11-NEXT: s_load_b32 s5, s[0:1], 0x30
; GFX11-NEXT: s_load_b32 s6, s[0:1], 0x40
; GFX11-NEXT: s_load_b32 s7, s[0:1], 0x50
; GFX11-NEXT: s_load_b32 s8, s[0:1], 0x60
; GFX11-NEXT: s_load_b32 s9, s[0:1], 0x70
; GFX11-NEXT: s_load_b32 s10, s[0:1], 0x80
; GFX11-NEXT: s_load_b32 s11, s[0:1], 0x90
; GFX11-NEXT: s_load_b32 s12, s[0:1], 0xa0
; GFX11-NEXT: s_load_b32 s13, s[0:1], 0xb0
; GFX11-NEXT: s_load_b32 s14, s[0:1], 0xc0
; GFX11-NEXT: s_load_b32 s15, s[0:1], 0xd0
; GFX11-NEXT: s_load_b32 s16, s[0:1], 0xe0
; GFX11-NEXT: s_load_b32 s17, s[0:1], 0xf0
; GFX11-NEXT: s_load_b32 s18, s[0:1], 0x100
; GFX11-NEXT: s_load_b32 s19, s[0:1], 0x110
; GFX11-NEXT: s_load_b32 s20, s[0:1], 0x120
; GFX11-NEXT: s_load_b32 s21, s[0:1], 0x130
; GFX11-NEXT: s_load_b32 s22, s[0:1], 0x140
; GFX11-NEXT: s_load_b32 s23, s[0:1], 0x150
; GFX11-NEXT: s_load_b32 s24, s[0:1], 0x160
; GFX11-NEXT: s_load_b32 s25, s[0:1], 0x170
; GFX11-NEXT: s_load_b32 s26, s[0:1], 0x180
; GFX11-NEXT: s_load_b32 s27, s[0:1], 0x190
; GFX11-NEXT: s_load_b32 s28, s[0:1], 0x1a0
; GFX11-NEXT: s_load_b32 s29, s[0:1], 0x1b0
; GFX11-NEXT: s_load_b32 s30, s[0:1], 0x1c0
; GFX11-NEXT: s_load_b32 s31, s[0:1], 0x1d0
; GFX11-NEXT: s_load_b32 s33, s[0:1], 0x1e0
; GFX11-NEXT: s_load_b32 s34, s[0:1], 0x1f0
; GFX11-NEXT: s_clause 0x1f
; GFX11-NEXT: s_load_b32 s35, s[0:1], 0x200
; GFX11-NEXT: s_load_b32 s36, s[0:1], 0x210
; GFX11-NEXT: s_load_b32 s37, s[0:1], 0x220
; GFX11-NEXT: s_load_b32 s38, s[0:1], 0x230
; GFX11-NEXT: s_load_b32 s39, s[0:1], 0x240
; GFX11-NEXT: s_load_b32 s40, s[0:1], 0x250
; GFX11-NEXT: s_load_b32 s41, s[0:1], 0x260
; GFX11-NEXT: s_load_b32 s42, s[0:1], 0x270
; GFX11-NEXT: s_load_b32 s43, s[0:1], 0x280
; GFX11-NEXT: s_load_b32 s44, s[0:1], 0x290
; GFX11-NEXT: s_load_b32 s45, s[0:1], 0x2a0
; GFX11-NEXT: s_load_b32 s46, s[0:1], 0x2b0
; GFX11-NEXT: s_load_b32 s47, s[0:1], 0x2c0
; GFX11-NEXT: s_load_b32 s48, s[0:1], 0x2d0
; GFX11-NEXT: s_load_b32 s49, s[0:1], 0x2e0
; GFX11-NEXT: s_load_b32 s50, s[0:1], 0x2f0
; GFX11-NEXT: s_load_b32 s51, s[0:1], 0x300
; GFX11-NEXT: s_load_b32 s52, s[0:1], 0x310
; GFX11-NEXT: s_load_b32 s53, s[0:1], 0x320
; GFX11-NEXT: s_load_b32 s54, s[0:1], 0x330
; GFX11-NEXT: s_load_b32 s55, s[0:1], 0x340
; GFX11-NEXT: s_load_b32 s56, s[0:1], 0x350
; GFX11-NEXT: s_load_b32 s57, s[0:1], 0x360
; GFX11-NEXT: s_load_b32 s58, s[0:1], 0x370
; GFX11-NEXT: s_load_b32 s59, s[0:1], 0x380
; GFX11-NEXT: s_load_b32 s60, s[0:1], 0x390
; GFX11-NEXT: s_load_b32 s61, s[0:1], 0x3a0
; GFX11-NEXT: s_load_b32 s62, s[0:1], 0x3b0
; GFX11-NEXT: s_load_b32 s63, s[0:1], 0x3c0
; GFX11-NEXT: s_load_b32 s64, s[0:1], 0x3d0
; GFX11-NEXT: s_load_b32 s65, s[0:1], 0x3e0
; GFX11-NEXT: s_load_b32 s66, s[0:1], 0x3f0
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b32 s67, s[0:1], 0x400
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x410
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s2
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s3
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s4
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s5
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s6
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s7
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s8
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s9
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s10
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s11
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s12
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s13
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s14
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s15
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s16
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s17
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s18
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s19
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s20
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s21
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s22
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s23
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s24
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s25
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s26
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s27
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s28
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s29
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s30
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s31
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s33
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s34
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s35
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s36
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s37
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s38
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s39
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s40
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s41
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s42
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s43
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s44
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s45
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s46
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s47
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s48
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s49
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s50
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s51
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s52
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s53
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s54
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s55
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s56
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s57
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s58
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s59
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s60
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s61
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s62
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s63
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s64
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s65
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s66
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s67
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s0
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: long_load_chain:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_clause 0x1f
; GFX12-NEXT: s_load_b32 s2, s[0:1], 0x0
; GFX12-NEXT: s_load_b32 s3, s[0:1], 0x10
; GFX12-NEXT: s_load_b32 s4, s[0:1], 0x20
; GFX12-NEXT: s_load_b32 s5, s[0:1], 0x30
; GFX12-NEXT: s_load_b32 s6, s[0:1], 0x40
; GFX12-NEXT: s_load_b32 s7, s[0:1], 0x50
; GFX12-NEXT: s_load_b32 s8, s[0:1], 0x60
; GFX12-NEXT: s_load_b32 s9, s[0:1], 0x70
; GFX12-NEXT: s_load_b32 s10, s[0:1], 0x80
; GFX12-NEXT: s_load_b32 s11, s[0:1], 0x90
; GFX12-NEXT: s_load_b32 s12, s[0:1], 0xa0
; GFX12-NEXT: s_load_b32 s13, s[0:1], 0xb0
; GFX12-NEXT: s_load_b32 s14, s[0:1], 0xc0
; GFX12-NEXT: s_load_b32 s15, s[0:1], 0xd0
; GFX12-NEXT: s_load_b32 s16, s[0:1], 0xe0
; GFX12-NEXT: s_load_b32 s17, s[0:1], 0xf0
; GFX12-NEXT: s_load_b32 s18, s[0:1], 0x100
; GFX12-NEXT: s_load_b32 s19, s[0:1], 0x110
; GFX12-NEXT: s_load_b32 s20, s[0:1], 0x120
; GFX12-NEXT: s_load_b32 s21, s[0:1], 0x130
; GFX12-NEXT: s_load_b32 s22, s[0:1], 0x140
; GFX12-NEXT: s_load_b32 s23, s[0:1], 0x150
; GFX12-NEXT: s_load_b32 s24, s[0:1], 0x160
; GFX12-NEXT: s_load_b32 s25, s[0:1], 0x170
; GFX12-NEXT: s_load_b32 s26, s[0:1], 0x180
; GFX12-NEXT: s_load_b32 s27, s[0:1], 0x190
; GFX12-NEXT: s_load_b32 s28, s[0:1], 0x1a0
; GFX12-NEXT: s_load_b32 s29, s[0:1], 0x1b0
; GFX12-NEXT: s_load_b32 s30, s[0:1], 0x1c0
; GFX12-NEXT: s_load_b32 s31, s[0:1], 0x1d0
; GFX12-NEXT: s_load_b32 s33, s[0:1], 0x1e0
; GFX12-NEXT: s_load_b32 s34, s[0:1], 0x1f0
; GFX12-NEXT: s_clause 0x1f
; GFX12-NEXT: s_load_b32 s35, s[0:1], 0x200
; GFX12-NEXT: s_load_b32 s36, s[0:1], 0x210
; GFX12-NEXT: s_load_b32 s37, s[0:1], 0x220
; GFX12-NEXT: s_load_b32 s38, s[0:1], 0x230
; GFX12-NEXT: s_load_b32 s39, s[0:1], 0x240
; GFX12-NEXT: s_load_b32 s40, s[0:1], 0x250
; GFX12-NEXT: s_load_b32 s41, s[0:1], 0x260
; GFX12-NEXT: s_load_b32 s42, s[0:1], 0x270
; GFX12-NEXT: s_load_b32 s43, s[0:1], 0x280
; GFX12-NEXT: s_load_b32 s44, s[0:1], 0x290
; GFX12-NEXT: s_load_b32 s45, s[0:1], 0x2a0
; GFX12-NEXT: s_load_b32 s46, s[0:1], 0x2b0
; GFX12-NEXT: s_load_b32 s47, s[0:1], 0x2c0
; GFX12-NEXT: s_load_b32 s48, s[0:1], 0x2d0
; GFX12-NEXT: s_load_b32 s49, s[0:1], 0x2e0
; GFX12-NEXT: s_load_b32 s50, s[0:1], 0x2f0
; GFX12-NEXT: s_load_b32 s51, s[0:1], 0x300
; GFX12-NEXT: s_load_b32 s52, s[0:1], 0x310
; GFX12-NEXT: s_load_b32 s53, s[0:1], 0x320
; GFX12-NEXT: s_load_b32 s54, s[0:1], 0x330
; GFX12-NEXT: s_load_b32 s55, s[0:1], 0x340
; GFX12-NEXT: s_load_b32 s56, s[0:1], 0x350
; GFX12-NEXT: s_load_b32 s57, s[0:1], 0x360
; GFX12-NEXT: s_load_b32 s58, s[0:1], 0x370
; GFX12-NEXT: s_load_b32 s59, s[0:1], 0x380
; GFX12-NEXT: s_load_b32 s60, s[0:1], 0x390
; GFX12-NEXT: s_load_b32 s61, s[0:1], 0x3a0
; GFX12-NEXT: s_load_b32 s62, s[0:1], 0x3b0
; GFX12-NEXT: s_load_b32 s63, s[0:1], 0x3c0
; GFX12-NEXT: s_load_b32 s64, s[0:1], 0x3d0
; GFX12-NEXT: s_load_b32 s65, s[0:1], 0x3e0
; GFX12-NEXT: s_load_b32 s66, s[0:1], 0x3f0
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: s_load_b32 s67, s[0:1], 0x400
; GFX12-NEXT: s_load_b32 s0, s[0:1], 0x410
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s2
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s3
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s4
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s5
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s6
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s7
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s8
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s9
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s10
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s11
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s12
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s13
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s14
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s15
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s16
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s17
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s18
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s19
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s20
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s21
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s22
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s23
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s24
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s25
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s26
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s27
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s28
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s29
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s30
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s31
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s33
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s34
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s35
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s36
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s37
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s38
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s39
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s40
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s41
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s42
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s43
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s44
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s45
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s46
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s47
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s48
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s49
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s50
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s51
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s52
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s53
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s54
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s55
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s56
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s57
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s58
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s59
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s60
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s61
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s62
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s63
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s64
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s65
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s66
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s67
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s0
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: s_endpgm
%v0 = load i32, ptr addrspace(1) %p
%ptr1 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 1
%v1 = load i32, ptr addrspace(1) %ptr1
%ptr2 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 2
%v2 = load i32, ptr addrspace(1) %ptr2
%ptr3 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 3
%v3 = load i32, ptr addrspace(1) %ptr3
%ptr4 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 4
%v4 = load i32, ptr addrspace(1) %ptr4
%ptr5 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 5
%v5 = load i32, ptr addrspace(1) %ptr5
%ptr6 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 6
%v6 = load i32, ptr addrspace(1) %ptr6
%ptr7 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 7
%v7 = load i32, ptr addrspace(1) %ptr7
%ptr8 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 8
%v8 = load i32, ptr addrspace(1) %ptr8
%ptr9 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 9
%v9 = load i32, ptr addrspace(1) %ptr9
%ptr10 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 10
%v10 = load i32, ptr addrspace(1) %ptr10
%ptr11 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 11
%v11 = load i32, ptr addrspace(1) %ptr11
%ptr12 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 12
%v12 = load i32, ptr addrspace(1) %ptr12
%ptr13 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 13
%v13 = load i32, ptr addrspace(1) %ptr13
%ptr14 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 14
%v14 = load i32, ptr addrspace(1) %ptr14
%ptr15 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 15
%v15 = load i32, ptr addrspace(1) %ptr15
%ptr16 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 16
%v16 = load i32, ptr addrspace(1) %ptr16
%ptr17 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 17
%v17 = load i32, ptr addrspace(1) %ptr17
%ptr18 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 18
%v18 = load i32, ptr addrspace(1) %ptr18
%ptr19 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 19
%v19 = load i32, ptr addrspace(1) %ptr19
%ptr20 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 20
%v20 = load i32, ptr addrspace(1) %ptr20
%ptr21 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 21
%v21 = load i32, ptr addrspace(1) %ptr21
%ptr22 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 22
%v22 = load i32, ptr addrspace(1) %ptr22
%ptr23 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 23
%v23 = load i32, ptr addrspace(1) %ptr23
%ptr24 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 24
%v24 = load i32, ptr addrspace(1) %ptr24
%ptr25 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 25
%v25 = load i32, ptr addrspace(1) %ptr25
%ptr26 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 26
%v26 = load i32, ptr addrspace(1) %ptr26
%ptr27 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 27
%v27 = load i32, ptr addrspace(1) %ptr27
%ptr28 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 28
%v28 = load i32, ptr addrspace(1) %ptr28
%ptr29 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 29
%v29 = load i32, ptr addrspace(1) %ptr29
%ptr30 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 30
%v30 = load i32, ptr addrspace(1) %ptr30
%ptr31 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 31
%v31 = load i32, ptr addrspace(1) %ptr31
%ptr32 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 32
%v32 = load i32, ptr addrspace(1) %ptr32
%ptr33 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 33
%v33 = load i32, ptr addrspace(1) %ptr33
%ptr34 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 34
%v34 = load i32, ptr addrspace(1) %ptr34
%ptr35 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 35
%v35 = load i32, ptr addrspace(1) %ptr35
%ptr36 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 36
%v36 = load i32, ptr addrspace(1) %ptr36
%ptr37 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 37
%v37 = load i32, ptr addrspace(1) %ptr37
%ptr38 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 38
%v38 = load i32, ptr addrspace(1) %ptr38
%ptr39 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 39
%v39 = load i32, ptr addrspace(1) %ptr39
%ptr40 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 40
%v40 = load i32, ptr addrspace(1) %ptr40
%ptr41 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 41
%v41 = load i32, ptr addrspace(1) %ptr41
%ptr42 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 42
%v42 = load i32, ptr addrspace(1) %ptr42
%ptr43 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 43
%v43 = load i32, ptr addrspace(1) %ptr43
%ptr44 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 44
%v44 = load i32, ptr addrspace(1) %ptr44
%ptr45 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 45
%v45 = load i32, ptr addrspace(1) %ptr45
%ptr46 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 46
%v46 = load i32, ptr addrspace(1) %ptr46
%ptr47 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 47
%v47 = load i32, ptr addrspace(1) %ptr47
%ptr48 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 48
%v48 = load i32, ptr addrspace(1) %ptr48
%ptr49 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 49
%v49 = load i32, ptr addrspace(1) %ptr49
%ptr50 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 50
%v50 = load i32, ptr addrspace(1) %ptr50
%ptr51 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 51
%v51 = load i32, ptr addrspace(1) %ptr51
%ptr52 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 52
%v52 = load i32, ptr addrspace(1) %ptr52
%ptr53 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 53
%v53 = load i32, ptr addrspace(1) %ptr53
%ptr54 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 54
%v54 = load i32, ptr addrspace(1) %ptr54
%ptr55 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 55
%v55 = load i32, ptr addrspace(1) %ptr55
%ptr56 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 56
%v56 = load i32, ptr addrspace(1) %ptr56
%ptr57 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 57
%v57 = load i32, ptr addrspace(1) %ptr57
%ptr58 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 58
%v58 = load i32, ptr addrspace(1) %ptr58
%ptr59 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 59
%v59 = load i32, ptr addrspace(1) %ptr59
%ptr60 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 60
%v60 = load i32, ptr addrspace(1) %ptr60
%ptr61 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 61
%v61 = load i32, ptr addrspace(1) %ptr61
%ptr62 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 62
%v62 = load i32, ptr addrspace(1) %ptr62
%ptr63 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 63
%v63 = load i32, ptr addrspace(1) %ptr63
%ptr64 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 64
%v64 = load i32, ptr addrspace(1) %ptr64
%ptr65 = getelementptr <4 x i32>, ptr addrspace(1) %p, i64 65
%v65 = load i32, ptr addrspace(1) %ptr65
call void asm sideeffect "; use $0", "s"(i32 %v0)
call void asm sideeffect "; use $0", "s"(i32 %v1)
call void asm sideeffect "; use $0", "s"(i32 %v2)
call void asm sideeffect "; use $0", "s"(i32 %v3)
call void asm sideeffect "; use $0", "s"(i32 %v4)
call void asm sideeffect "; use $0", "s"(i32 %v5)
call void asm sideeffect "; use $0", "s"(i32 %v6)
call void asm sideeffect "; use $0", "s"(i32 %v7)
call void asm sideeffect "; use $0", "s"(i32 %v8)
call void asm sideeffect "; use $0", "s"(i32 %v9)
call void asm sideeffect "; use $0", "s"(i32 %v10)
call void asm sideeffect "; use $0", "s"(i32 %v11)
call void asm sideeffect "; use $0", "s"(i32 %v12)
call void asm sideeffect "; use $0", "s"(i32 %v13)
call void asm sideeffect "; use $0", "s"(i32 %v14)
call void asm sideeffect "; use $0", "s"(i32 %v15)
call void asm sideeffect "; use $0", "s"(i32 %v16)
call void asm sideeffect "; use $0", "s"(i32 %v17)
call void asm sideeffect "; use $0", "s"(i32 %v18)
call void asm sideeffect "; use $0", "s"(i32 %v19)
call void asm sideeffect "; use $0", "s"(i32 %v20)
call void asm sideeffect "; use $0", "s"(i32 %v21)
call void asm sideeffect "; use $0", "s"(i32 %v22)
call void asm sideeffect "; use $0", "s"(i32 %v23)
call void asm sideeffect "; use $0", "s"(i32 %v24)
call void asm sideeffect "; use $0", "s"(i32 %v25)
call void asm sideeffect "; use $0", "s"(i32 %v26)
call void asm sideeffect "; use $0", "s"(i32 %v27)
call void asm sideeffect "; use $0", "s"(i32 %v28)
call void asm sideeffect "; use $0", "s"(i32 %v29)
call void asm sideeffect "; use $0", "s"(i32 %v30)
call void asm sideeffect "; use $0", "s"(i32 %v31)
call void asm sideeffect "; use $0", "s"(i32 %v32)
call void asm sideeffect "; use $0", "s"(i32 %v33)
call void asm sideeffect "; use $0", "s"(i32 %v34)
call void asm sideeffect "; use $0", "s"(i32 %v35)
call void asm sideeffect "; use $0", "s"(i32 %v36)
call void asm sideeffect "; use $0", "s"(i32 %v37)
call void asm sideeffect "; use $0", "s"(i32 %v38)
call void asm sideeffect "; use $0", "s"(i32 %v39)
call void asm sideeffect "; use $0", "s"(i32 %v40)
call void asm sideeffect "; use $0", "s"(i32 %v41)
call void asm sideeffect "; use $0", "s"(i32 %v42)
call void asm sideeffect "; use $0", "s"(i32 %v43)
call void asm sideeffect "; use $0", "s"(i32 %v44)
call void asm sideeffect "; use $0", "s"(i32 %v45)
call void asm sideeffect "; use $0", "s"(i32 %v46)
call void asm sideeffect "; use $0", "s"(i32 %v47)
call void asm sideeffect "; use $0", "s"(i32 %v48)
call void asm sideeffect "; use $0", "s"(i32 %v49)
call void asm sideeffect "; use $0", "s"(i32 %v50)
call void asm sideeffect "; use $0", "s"(i32 %v51)
call void asm sideeffect "; use $0", "s"(i32 %v52)
call void asm sideeffect "; use $0", "s"(i32 %v53)
call void asm sideeffect "; use $0", "s"(i32 %v54)
call void asm sideeffect "; use $0", "s"(i32 %v55)
call void asm sideeffect "; use $0", "s"(i32 %v56)
call void asm sideeffect "; use $0", "s"(i32 %v57)
call void asm sideeffect "; use $0", "s"(i32 %v58)
call void asm sideeffect "; use $0", "s"(i32 %v59)
call void asm sideeffect "; use $0", "s"(i32 %v60)
call void asm sideeffect "; use $0", "s"(i32 %v61)
call void asm sideeffect "; use $0", "s"(i32 %v62)
call void asm sideeffect "; use $0", "s"(i32 %v63)
call void asm sideeffect "; use $0", "s"(i32 %v64)
call void asm sideeffect "; use $0", "s"(i32 %v65)
ret void
}