llvm/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX900 %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - %s | FileCheck -check-prefix=GFX90A %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10PLUS %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - %s | FileCheck -check-prefix=GFX10PLUS %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GFX12 %s

define amdgpu_ps float @atomic_swap_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
; GFX6-LABEL: atomic_swap_i32_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_swap_i32_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_swap_i32_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_swap_i32_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
; GFX90A-NEXT:    image_atomic_swap v0, v2, s[0:7] dmask:0x1 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_swap_i32_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_swap_i32_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i32 %v to float
  ret float %out
}

define amdgpu_ps float @atomic_add_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
; GFX6-LABEL: atomic_add_i32_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_add_i32_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_add_i32_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_add_i32_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
; GFX90A-NEXT:    image_atomic_add v0, v2, s[0:7] dmask:0x1 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_add_i32_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_add_i32_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i32 %v to float
  ret float %out
}

define amdgpu_ps float @atomic_sub_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
; GFX6-LABEL: atomic_sub_i32_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_sub_i32_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_sub_i32_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_sub_i32_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
; GFX90A-NEXT:    image_atomic_sub v0, v2, s[0:7] dmask:0x1 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_sub_i32_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_sub_i32_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_sub_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i32 %v to float
  ret float %out
}

define amdgpu_ps float @atomic_smin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
; GFX6-LABEL: atomic_smin_i32_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_smin_i32_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_smin_i32_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_smin_i32_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
; GFX90A-NEXT:    image_atomic_smin v0, v2, s[0:7] dmask:0x1 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_smin_i32_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_smin_i32_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_min_int v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i32 %v to float
  ret float %out
}

define amdgpu_ps float @atomic_umin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
; GFX6-LABEL: atomic_umin_i32_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_umin_i32_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_umin_i32_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_umin_i32_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
; GFX90A-NEXT:    image_atomic_umin v0, v2, s[0:7] dmask:0x1 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_umin_i32_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_umin_i32_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_min_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i32 %v to float
  ret float %out
}

define amdgpu_ps float @atomic_smax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
; GFX6-LABEL: atomic_smax_i32_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_smax_i32_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_smax_i32_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_smax_i32_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
; GFX90A-NEXT:    image_atomic_smax v0, v2, s[0:7] dmask:0x1 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_smax_i32_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_smax_i32_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_max_int v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i32 %v to float
  ret float %out
}

define amdgpu_ps float @atomic_umax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
; GFX6-LABEL: atomic_umax_i32_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_umax_i32_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_umax_i32_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_umax_i32_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
; GFX90A-NEXT:    image_atomic_umax v0, v2, s[0:7] dmask:0x1 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_umax_i32_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_umax_i32_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_max_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i32 %v to float
  ret float %out
}

define amdgpu_ps float @atomic_and_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
; GFX6-LABEL: atomic_and_i32_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_and_i32_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_and_i32_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_and_i32_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
; GFX90A-NEXT:    image_atomic_and v0, v2, s[0:7] dmask:0x1 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_and_i32_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_and_i32_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i32 %v to float
  ret float %out
}

define amdgpu_ps float @atomic_or_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
; GFX6-LABEL: atomic_or_i32_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_or_i32_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_or_i32_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_or_i32_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
; GFX90A-NEXT:    image_atomic_or v0, v2, s[0:7] dmask:0x1 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_or_i32_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_or_i32_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i32 %v to float
  ret float %out
}

define amdgpu_ps float @atomic_xor_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
; GFX6-LABEL: atomic_xor_i32_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_xor_i32_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_xor_i32_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_xor_i32_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
; GFX90A-NEXT:    image_atomic_xor v0, v2, s[0:7] dmask:0x1 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_xor_i32_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_xor_i32_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i32 %v to float
  ret float %out
}

define amdgpu_ps float @atomic_inc_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
; GFX6-LABEL: atomic_inc_i32_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_inc_i32_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_inc_i32_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_inc_i32_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
; GFX90A-NEXT:    image_atomic_inc v0, v2, s[0:7] dmask:0x1 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_inc_i32_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_inc_i32_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_inc_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i32 %v to float
  ret float %out
}

define amdgpu_ps float @atomic_dec_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
; GFX6-LABEL: atomic_dec_i32_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_dec_i32_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_dec_i32_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_dec_i32_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
; GFX90A-NEXT:    image_atomic_dec v0, v2, s[0:7] dmask:0x1 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_dec_i32_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_dec_i32_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_dec_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i32 %v to float
  ret float %out
}

define amdgpu_ps float @atomic_cmpswap_i32_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) {
; GFX6-LABEL: atomic_cmpswap_i32_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_cmpswap_i32_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_cmpswap_i32_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_cmpswap_i32_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_cmpswap_i32_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_cmpswap_i32_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i32 %v to float
  ret float %out
}

define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) {
; GFX6-LABEL: atomic_cmpswap_i32_1d_no_return:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX6-NEXT:    s_endpgm
;
; GFX8-LABEL: atomic_cmpswap_i32_1d_no_return:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX8-NEXT:    s_endpgm
;
; GFX900-LABEL: atomic_cmpswap_i32_1d_no_return:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX900-NEXT:    s_endpgm
;
; GFX90A-LABEL: atomic_cmpswap_i32_1d_no_return:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX90A-NEXT:    s_endpgm
;
; GFX10PLUS-LABEL: atomic_cmpswap_i32_1d_no_return:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_endpgm
;
; GFX12-LABEL: atomic_cmpswap_i32_1d_no_return:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_endpgm
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  ret void
}

define amdgpu_ps float @atomic_add_i32_2d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t) {
; GFX6-LABEL: atomic_add_i32_2d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_add_i32_2d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_add_i32_2d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_add_i32_2d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    v_mov_b32_e32 v4, v1
; GFX90A-NEXT:    v_mov_b32_e32 v5, v2
; GFX90A-NEXT:    image_atomic_add v0, v[4:5], s[0:7] dmask:0x1 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_add_i32_2d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_add_i32_2d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_add_uint v0, [v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i32 %v to float
  ret float %out
}

define amdgpu_ps float @atomic_add_i32_3d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %r) {
; GFX6-LABEL: atomic_add_i32_3d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_add_i32_3d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_add_i32_3d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_add_i32_3d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    v_mov_b32_e32 v4, v1
; GFX90A-NEXT:    v_mov_b32_e32 v5, v2
; GFX90A-NEXT:    v_mov_b32_e32 v6, v3
; GFX90A-NEXT:    image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_add_i32_3d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_add_i32_3d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i32 %v to float
  ret float %out
}

define amdgpu_ps float @atomic_add_i32_cube(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %face) {
; GFX6-LABEL: atomic_add_i32_cube:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_add_i32_cube:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_add_i32_cube:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_add_i32_cube:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    v_mov_b32_e32 v4, v1
; GFX90A-NEXT:    v_mov_b32_e32 v5, v2
; GFX90A-NEXT:    v_mov_b32_e32 v6, v3
; GFX90A-NEXT:    image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc da
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_add_i32_cube:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_add_i32_cube:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i32 %v to float
  ret float %out
}

define amdgpu_ps float @atomic_add_i32_1darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %slice) {
; GFX6-LABEL: atomic_add_i32_1darray:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_add_i32_1darray:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_add_i32_1darray:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_add_i32_1darray:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    v_mov_b32_e32 v4, v1
; GFX90A-NEXT:    v_mov_b32_e32 v5, v2
; GFX90A-NEXT:    image_atomic_add v0, v[4:5], s[0:7] dmask:0x1 unorm glc da
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_add_i32_1darray:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_add_i32_1darray:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_add_uint v0, [v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i32 %v to float
  ret float %out
}

define amdgpu_ps float @atomic_add_i32_2darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice) {
; GFX6-LABEL: atomic_add_i32_2darray:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_add_i32_2darray:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_add_i32_2darray:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc da
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_add_i32_2darray:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    v_mov_b32_e32 v4, v1
; GFX90A-NEXT:    v_mov_b32_e32 v5, v2
; GFX90A-NEXT:    v_mov_b32_e32 v6, v3
; GFX90A-NEXT:    image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc da
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_add_i32_2darray:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_add_i32_2darray:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i32 %v to float
  ret float %out
}

define amdgpu_ps float @atomic_add_i32_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %fragid) {
; GFX6-LABEL: atomic_add_i32_2dmsaa:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_add_i32_2dmsaa:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_add_i32_2dmsaa:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_add_i32_2dmsaa:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    v_mov_b32_e32 v4, v1
; GFX90A-NEXT:    v_mov_b32_e32 v5, v2
; GFX90A-NEXT:    v_mov_b32_e32 v6, v3
; GFX90A-NEXT:    image_atomic_add v0, v[4:6], s[0:7] dmask:0x1 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_add_i32_2dmsaa:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_add_i32_2dmsaa:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i32 %v to float
  ret float %out
}

define amdgpu_ps float @atomic_add_i32_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
; GFX6-LABEL: atomic_add_i32_2darraymsaa:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_add_i32_2darraymsaa:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_add_i32_2darraymsaa:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_add_i32_2darraymsaa:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    v_mov_b32_e32 v6, v1
; GFX90A-NEXT:    v_mov_b32_e32 v7, v2
; GFX90A-NEXT:    v_mov_b32_e32 v8, v3
; GFX90A-NEXT:    v_mov_b32_e32 v9, v4
; GFX90A-NEXT:    image_atomic_add v0, v[6:9], s[0:7] dmask:0x1 unorm glc da
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_add_i32_2darraymsaa:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_add_i32_2darraymsaa:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_add_uint v0, [v1, v2, v3, v4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i32 %v to float
  ret float %out
}

define amdgpu_ps float @atomic_add_i32_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
; GFX6-LABEL: atomic_add_i32_1d_slc:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_add_i32_1d_slc:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_add_i32_1d_slc:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_add_i32_1d_slc:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
; GFX90A-NEXT:    image_atomic_add v0, v2, s[0:7] dmask:0x1 unorm glc slc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_add_i32_1d_slc:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc slc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_add_i32_1d_slc:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
  %out = bitcast i32 %v to float
  ret float %out
}

define amdgpu_ps <2 x float> @atomic_swap_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
; GFX6-LABEL: atomic_swap_i64_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_swap_i64_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_swap_i64_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_swap_i64_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_swap_i64_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_swap_i64_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i64 %v to <2 x float>
  ret <2 x float> %out
}

define amdgpu_ps <2 x float> @atomic_add_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
; GFX6-LABEL: atomic_add_i64_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_add_i64_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_add_i64_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_add_i64_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_add_i64_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_add_i64_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_add_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i64 %v to <2 x float>
  ret <2 x float> %out
}

define amdgpu_ps <2 x float> @atomic_sub_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
; GFX6-LABEL: atomic_sub_i64_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_sub_i64_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_sub_i64_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_sub_i64_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_sub_i64_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_sub_i64_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_sub_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i64 %v to <2 x float>
  ret <2 x float> %out
}

define amdgpu_ps <2 x float> @atomic_smin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
; GFX6-LABEL: atomic_smin_i64_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_smin_i64_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_smin_i64_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_smin_i64_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_smin_i64_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_smin_i64_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_min_int v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i64 %v to <2 x float>
  ret <2 x float> %out
}

define amdgpu_ps <2 x float> @atomic_umin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
; GFX6-LABEL: atomic_umin_i64_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_umin_i64_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_umin_i64_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_umin_i64_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_umin_i64_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_umin_i64_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_min_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i64 %v to <2 x float>
  ret <2 x float> %out
}

define amdgpu_ps <2 x float> @atomic_smax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
; GFX6-LABEL: atomic_smax_i64_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_smax_i64_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_smax_i64_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_smax_i64_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_smax_i64_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_smax_i64_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_max_int v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i64 %v to <2 x float>
  ret <2 x float> %out
}

define amdgpu_ps <2 x float> @atomic_umax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
; GFX6-LABEL: atomic_umax_i64_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_umax_i64_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_umax_i64_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_umax_i64_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_umax_i64_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_umax_i64_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_max_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i64 %v to <2 x float>
  ret <2 x float> %out
}

define amdgpu_ps <2 x float> @atomic_and_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
; GFX6-LABEL: atomic_and_i64_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_and_i64_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_and_i64_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_and_i64_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_and_i64_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_and_i64_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.and.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i64 %v to <2 x float>
  ret <2 x float> %out
}

define amdgpu_ps <2 x float> @atomic_or_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
; GFX6-LABEL: atomic_or_i64_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_or_i64_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_or_i64_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_or_i64_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_or_i64_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_or_i64_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.or.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i64 %v to <2 x float>
  ret <2 x float> %out
}

define amdgpu_ps <2 x float> @atomic_xor_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
; GFX6-LABEL: atomic_xor_i64_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_xor_i64_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_xor_i64_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_xor_i64_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_xor_i64_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_xor_i64_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i64 %v to <2 x float>
  ret <2 x float> %out
}

define amdgpu_ps <2 x float> @atomic_inc_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
; GFX6-LABEL: atomic_inc_i64_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_inc_i64_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_inc_i64_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_inc_i64_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_inc_i64_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_inc_i64_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_inc_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i64 %v to <2 x float>
  ret <2 x float> %out
}

define amdgpu_ps <2 x float> @atomic_dec_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
; GFX6-LABEL: atomic_dec_i64_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_dec_i64_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_dec_i64_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_dec_i64_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_dec_i64_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_dec_i64_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_dec_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i64 %v to <2 x float>
  ret <2 x float> %out
}

define amdgpu_ps <2 x float> @atomic_cmpswap_i64_1d(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) {
; GFX6-LABEL: atomic_cmpswap_i64_1d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_cmpswap_i64_1d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_cmpswap_i64_1d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_cmpswap_i64_1d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_cmpswap_i64_1d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_cmpswap_i64_1d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i64 %v to <2 x float>
  ret <2 x float> %out
}

define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) {
; GFX6-LABEL: atomic_cmpswap_i64_1d_no_return:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
; GFX6-NEXT:    s_endpgm
;
; GFX8-LABEL: atomic_cmpswap_i64_1d_no_return:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
; GFX8-NEXT:    s_endpgm
;
; GFX900-LABEL: atomic_cmpswap_i64_1d_no_return:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
; GFX900-NEXT:    s_endpgm
;
; GFX90A-LABEL: atomic_cmpswap_i64_1d_no_return:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
; GFX90A-NEXT:    s_endpgm
;
; GFX10PLUS-LABEL: atomic_cmpswap_i64_1d_no_return:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc
; GFX10PLUS-NEXT:    s_endpgm
;
; GFX12-LABEL: atomic_cmpswap_i64_1d_no_return:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_endpgm
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
  ret void
}

define amdgpu_ps <2 x float> @atomic_add_i64_2d(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t) {
; GFX6-LABEL: atomic_add_i64_2d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_add_i64_2d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_add_i64_2d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_add_i64_2d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_add_i64_2d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_add_i64_2d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_add_uint v[0:1], [v2, v3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.add.2d.i64.i32(i64 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i64 %v to <2 x float>
  ret <2 x float> %out
}

define amdgpu_ps <2 x float> @atomic_add_i64_3d(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %r) {
; GFX6-LABEL: atomic_add_i64_3d:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_add_i64_3d:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_add_i64_3d:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_add_i64_3d:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_add_i64_3d:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_add_i64_3d:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_add_uint v[0:1], [v2, v3, v4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.add.3d.i64.i32(i64 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i64 %v to <2 x float>
  ret <2 x float> %out
}

define amdgpu_ps <2 x float> @atomic_add_i64_cube(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %face) {
; GFX6-LABEL: atomic_add_i64_cube:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_add_i64_cube:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_add_i64_cube:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_add_i64_cube:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_add_i64_cube:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_CUBE unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_add_i64_cube:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_add_uint v[0:1], [v2, v3, v4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_CUBE th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.add.cube.i64.i32(i64 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i64 %v to <2 x float>
  ret <2 x float> %out
}

define amdgpu_ps <2 x float> @atomic_add_i64_1darray(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %slice) {
; GFX6-LABEL: atomic_add_i64_1darray:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_add_i64_1darray:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_add_i64_1darray:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_add_i64_1darray:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc da
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_add_i64_1darray:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_add_i64_1darray:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_add_uint v[0:1], [v2, v3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D_ARRAY th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i32(i64 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i64 %v to <2 x float>
  ret <2 x float> %out
}

define amdgpu_ps <2 x float> @atomic_add_i64_2darray(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %slice) {
; GFX6-LABEL: atomic_add_i64_2darray:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_add_i64_2darray:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_add_i64_2darray:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_add_i64_2darray:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc da
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_add_i64_2darray:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_add_i64_2darray:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_add_uint v[0:1], [v2, v3, v4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i32(i64 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i64 %v to <2 x float>
  ret <2 x float> %out
}

define amdgpu_ps <2 x float> @atomic_add_i64_2dmsaa(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %fragid) {
; GFX6-LABEL: atomic_add_i64_2dmsaa:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_add_i64_2dmsaa:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_add_i64_2dmsaa:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_add_i64_2dmsaa:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 unorm glc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_add_i64_2dmsaa:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_add v[0:1], v[2:4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_add_i64_2dmsaa:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_add_uint v[0:1], [v2, v3, v4], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i32(i64 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i64 %v to <2 x float>
  ret <2 x float> %out
}

define amdgpu_ps <2 x float> @atomic_add_i64_2darraymsaa(<8 x i32> inreg %rsrc, i64 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
; GFX6-LABEL: atomic_add_i64_2darraymsaa:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_add_i64_2darraymsaa:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_add_i64_2darraymsaa:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_add_i64_2darraymsaa:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 unorm glc da
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_add_i64_2darraymsaa:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_add v[0:1], v[2:5], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_add_i64_2darraymsaa:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_add_uint v[0:1], [v2, v3, v4, v5], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY th:TH_ATOMIC_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i32(i64 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
  %out = bitcast i64 %v to <2 x float>
  ret <2 x float> %out
}

define amdgpu_ps <2 x float> @atomic_add_i64_1d_slc(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
; GFX6-LABEL: atomic_add_i64_1d_slc:
; GFX6:       ; %bb.0: ; %main_body
; GFX6-NEXT:    s_mov_b32 s0, s2
; GFX6-NEXT:    s_mov_b32 s1, s3
; GFX6-NEXT:    s_mov_b32 s2, s4
; GFX6-NEXT:    s_mov_b32 s3, s5
; GFX6-NEXT:    s_mov_b32 s4, s6
; GFX6-NEXT:    s_mov_b32 s5, s7
; GFX6-NEXT:    s_mov_b32 s6, s8
; GFX6-NEXT:    s_mov_b32 s7, s9
; GFX6-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
; GFX6-NEXT:    ; return to shader part epilog
;
; GFX8-LABEL: atomic_add_i64_1d_slc:
; GFX8:       ; %bb.0: ; %main_body
; GFX8-NEXT:    s_mov_b32 s0, s2
; GFX8-NEXT:    s_mov_b32 s1, s3
; GFX8-NEXT:    s_mov_b32 s2, s4
; GFX8-NEXT:    s_mov_b32 s3, s5
; GFX8-NEXT:    s_mov_b32 s4, s6
; GFX8-NEXT:    s_mov_b32 s5, s7
; GFX8-NEXT:    s_mov_b32 s6, s8
; GFX8-NEXT:    s_mov_b32 s7, s9
; GFX8-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
; GFX8-NEXT:    s_waitcnt vmcnt(0)
; GFX8-NEXT:    ; return to shader part epilog
;
; GFX900-LABEL: atomic_add_i64_1d_slc:
; GFX900:       ; %bb.0: ; %main_body
; GFX900-NEXT:    s_mov_b32 s0, s2
; GFX900-NEXT:    s_mov_b32 s1, s3
; GFX900-NEXT:    s_mov_b32 s2, s4
; GFX900-NEXT:    s_mov_b32 s3, s5
; GFX900-NEXT:    s_mov_b32 s4, s6
; GFX900-NEXT:    s_mov_b32 s5, s7
; GFX900-NEXT:    s_mov_b32 s6, s8
; GFX900-NEXT:    s_mov_b32 s7, s9
; GFX900-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
; GFX900-NEXT:    s_waitcnt vmcnt(0)
; GFX900-NEXT:    ; return to shader part epilog
;
; GFX90A-LABEL: atomic_add_i64_1d_slc:
; GFX90A:       ; %bb.0: ; %main_body
; GFX90A-NEXT:    s_mov_b32 s0, s2
; GFX90A-NEXT:    s_mov_b32 s1, s3
; GFX90A-NEXT:    s_mov_b32 s2, s4
; GFX90A-NEXT:    s_mov_b32 s3, s5
; GFX90A-NEXT:    s_mov_b32 s4, s6
; GFX90A-NEXT:    s_mov_b32 s5, s7
; GFX90A-NEXT:    s_mov_b32 s6, s8
; GFX90A-NEXT:    s_mov_b32 s7, s9
; GFX90A-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc
; GFX90A-NEXT:    s_waitcnt vmcnt(0)
; GFX90A-NEXT:    ; return to shader part epilog
;
; GFX10PLUS-LABEL: atomic_add_i64_1d_slc:
; GFX10PLUS:       ; %bb.0: ; %main_body
; GFX10PLUS-NEXT:    s_mov_b32 s0, s2
; GFX10PLUS-NEXT:    s_mov_b32 s1, s3
; GFX10PLUS-NEXT:    s_mov_b32 s2, s4
; GFX10PLUS-NEXT:    s_mov_b32 s3, s5
; GFX10PLUS-NEXT:    s_mov_b32 s4, s6
; GFX10PLUS-NEXT:    s_mov_b32 s5, s7
; GFX10PLUS-NEXT:    s_mov_b32 s6, s8
; GFX10PLUS-NEXT:    s_mov_b32 s7, s9
; GFX10PLUS-NEXT:    image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc slc
; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
; GFX10PLUS-NEXT:    ; return to shader part epilog
;
; GFX12-LABEL: atomic_add_i64_1d_slc:
; GFX12:       ; %bb.0: ; %main_body
; GFX12-NEXT:    s_mov_b32 s0, s2
; GFX12-NEXT:    s_mov_b32 s1, s3
; GFX12-NEXT:    s_mov_b32 s2, s4
; GFX12-NEXT:    s_mov_b32 s3, s5
; GFX12-NEXT:    s_mov_b32 s4, s6
; GFX12-NEXT:    s_mov_b32 s5, s7
; GFX12-NEXT:    s_mov_b32 s6, s8
; GFX12-NEXT:    s_mov_b32 s7, s9
; GFX12-NEXT:    image_atomic_add_uint v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT_RETURN
; GFX12-NEXT:    s_wait_loadcnt 0x0
; GFX12-NEXT:    ; return to shader part epilog
main_body:
  %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
  %out = bitcast i64 %v to <2 x float>
  ret <2 x float> %out
}

declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0

declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i64 @llvm.amdgcn.image.atomic.and.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i64 @llvm.amdgcn.image.atomic.or.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64, i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i64 @llvm.amdgcn.image.atomic.add.2d.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i64 @llvm.amdgcn.image.atomic.add.3d.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i64 @llvm.amdgcn.image.atomic.add.cube.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
declare i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i32(i64, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0

attributes #0 = { nounwind }