; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 4
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=atomic-expand -mcpu=gfx803 %s | FileCheck -check-prefixes=CHECK,GFX803 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=atomic-expand -mcpu=gfx900 %s | FileCheck -check-prefixes=CHECK,GFX900 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=atomic-expand -mcpu=gfx90a %s | FileCheck -check-prefixes=CHECK,GFX90A %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=atomic-expand -mcpu=gfx1030 %s | FileCheck -check-prefixes=CHECK,GFX10 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=atomic-expand -mcpu=gfx1100 %s | FileCheck -check-prefixes=CHECK,GFX11 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=atomic-expand -mcpu=gfx940 %s | FileCheck -check-prefixes=CHECK,GFX940 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=atomic-expand -mcpu=gfx1200 %s | FileCheck -check-prefixes=CHECK,GFX12 %s
; Test that system scoped atomicrmw or 0 is transformed to add 0.
; Transform to add
define i32 @test_atomicrmw_or_0_global_system(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_or_0_global_system(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 0 seq_cst, align 4, !foo.md [[META0:![0-9]+]]
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i32 0 seq_cst, !foo.md !0
ret i32 %res
}
; Transform to add
define i32 @test_atomicrmw_or_0_global_one_as(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_or_0_global_one_as(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 0 syncscope("one-as") seq_cst, align 4
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i32 0 syncscope("one-as") seq_cst
ret i32 %res
}
; Transform to add
define i32 @test_atomicrmw_or_0_flat_system(ptr %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_or_0_flat_system(
; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr [[PTR]], i32 0 seq_cst, align 4
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw or ptr %ptr, i32 0 seq_cst
ret i32 %res
}
; Transform to add
define i32 @test_atomicrmw_or_0_as999_system(ptr addrspace(999) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_or_0_as999_system(
; CHECK-SAME: ptr addrspace(999) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(999) [[PTR]], i32 0 seq_cst, align 4
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw or ptr addrspace(999) %ptr, i32 0 seq_cst
ret i32 %res
}
; Leave as-is, only system scope should be changed.
define i32 @test_atomicrmw_or_0_global_agent(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_or_0_global_agent(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 0 syncscope("agent") seq_cst, align 4
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i32 0 syncscope("agent") seq_cst
ret i32 %res
}
; Leave as-is, LDS atomics aren't relevant.
define i32 @test_atomicrmw_or_0_local(ptr addrspace(3) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_or_0_local(
; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr addrspace(3) [[PTR]], i32 0 seq_cst, align 4
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw or ptr addrspace(3) %ptr, i32 0 seq_cst
ret i32 %res
}
; Leave non-0 values alone.
define i32 @test_atomicrmw_or_1_global_system(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_or_1_global_system(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 1 seq_cst, align 4
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i32 1 seq_cst
ret i32 %res
}
define i32 @test_atomicrmw_or_var_global_system(ptr addrspace(1) %ptr, i32 %val) {
; CHECK-LABEL: define i32 @test_atomicrmw_or_var_global_system(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 [[VAL]] seq_cst, align 4
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i32 %val seq_cst
ret i32 %res
}
; Leave as-is
define i32 @test_atomicrmw_add_0_global_system(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_add_0_global_system(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 0 seq_cst, align 4
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw add ptr addrspace(1) %ptr, i32 0 seq_cst
ret i32 %res
}
; Transform to add
define i32 @test_atomicrmw_sub_0_global_system(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_sub_0_global_system(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 0 seq_cst, align 4, !foo.md [[META0]]
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw sub ptr addrspace(1) %ptr, i32 0 seq_cst, !foo.md !0
ret i32 %res
}
; Transform to add
define i32 @test_atomicrmw_xor_0_global_system(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_xor_0_global_system(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 0 seq_cst, align 4, !foo.md [[META0]]
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw xor ptr addrspace(1) %ptr, i32 0 seq_cst, !foo.md !0
ret i32 %res
}
define i32 @test_atomicrmw_or_0_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_or_0_global_system__amdgpu_no_fine_grained_memory(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 0 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i32 0 seq_cst, !amdgpu.no.fine.grained.memory !0
ret i32 %res
}
define i32 @test_atomicrmw_or_0_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_or_0_global_system__amdgpu_no_remote_memory(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 0 seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i32 0 seq_cst, !amdgpu.no.remote.memory !0
ret i32 %res
}
define i32 @test_atomicrmw_or_0_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_or_0_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 0 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i32 0 seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
ret i32 %res
}
define i32 @test_atomicrmw_xor_0_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_xor_0_global_system__amdgpu_no_fine_grained_memory(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 0 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw xor ptr addrspace(1) %ptr, i32 0 seq_cst, !amdgpu.no.fine.grained.memory !0
ret i32 %res
}
define i32 @test_atomicrmw_xor_0_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_xor_0_global_system__amdgpu_no_remote_memory(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 0 seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw xor ptr addrspace(1) %ptr, i32 0 seq_cst, !amdgpu.no.remote.memory !0
ret i32 %res
}
define i32 @test_atomicrmw_xor_0_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_xor_0_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 0 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw xor ptr addrspace(1) %ptr, i32 0 seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
ret i32 %res
}
define i32 @test_atomicrmw_sub_0_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_sub_0_global_system__amdgpu_no_fine_grained_memory(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 0 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw sub ptr addrspace(1) %ptr, i32 0 seq_cst, !amdgpu.no.fine.grained.memory !0
ret i32 %res
}
define i32 @test_atomicrmw_sub_0_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_sub_0_global_system__amdgpu_no_remote_memory(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 0 seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw sub ptr addrspace(1) %ptr, i32 0 seq_cst, !amdgpu.no.remote.memory !0
ret i32 %res
}
define i32 @test_atomicrmw_sub_0_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_sub_0_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr addrspace(1) [[PTR]], i32 0 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw sub ptr addrspace(1) %ptr, i32 0 seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
ret i32 %res
}
define i32 @test_atomicrmw_or_0_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_or_0_global_agent__amdgpu_no_fine_grained_memory(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 0 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i32 0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
ret i32 %res
}
define i32 @test_atomicrmw_or_0_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_or_0_global_agent__amdgpu_no_remote_memory(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 0 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i32 0 syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
ret i32 %res
}
define i32 @test_atomicrmw_or_0_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_or_0_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr addrspace(1) [[PTR]], i32 0 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw or ptr addrspace(1) %ptr, i32 0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
ret i32 %res
}
define i32 @test_atomicrmw_xor_0_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_xor_0_global_agent__amdgpu_no_fine_grained_memory(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 0 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw xor ptr addrspace(1) %ptr, i32 0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
ret i32 %res
}
define i32 @test_atomicrmw_xor_0_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_xor_0_global_agent__amdgpu_no_remote_memory(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 0 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw xor ptr addrspace(1) %ptr, i32 0 syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
ret i32 %res
}
define i32 @test_atomicrmw_xor_0_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_xor_0_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw xor ptr addrspace(1) [[PTR]], i32 0 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw xor ptr addrspace(1) %ptr, i32 0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
ret i32 %res
}
define i32 @test_atomicrmw_sub_0_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_sub_0_global_agent__amdgpu_no_fine_grained_memory(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i32 0 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw sub ptr addrspace(1) %ptr, i32 0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
ret i32 %res
}
define i32 @test_atomicrmw_sub_0_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_sub_0_global_agent__amdgpu_no_remote_memory(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i32 0 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw sub ptr addrspace(1) %ptr, i32 0 syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
ret i32 %res
}
define i32 @test_atomicrmw_sub_0_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i32 @test_atomicrmw_sub_0_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RES:%.*]] = atomicrmw sub ptr addrspace(1) [[PTR]], i32 0 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
; CHECK-NEXT: ret i32 [[RES]]
;
%res = atomicrmw sub ptr addrspace(1) %ptr, i32 0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
ret i32 %res
}
!0 = !{}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX10: {{.*}}
; GFX11: {{.*}}
; GFX12: {{.*}}
; GFX803: {{.*}}
; GFX900: {{.*}}
; GFX90A: {{.*}}
; GFX940: {{.*}}