; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-atomic-optimizer<strategy=iterative>,verify<domtree>' %s | FileCheck -check-prefix=IR-ITERATIVE %s
; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-atomic-optimizer<strategy=dpp>,verify<domtree>' %s | FileCheck -check-prefix=IR-DPP %s
declare i32 @llvm.amdgcn.workitem.id.x()
define amdgpu_kernel void @global_atomic_fadd_uni_value(ptr addrspace(1) %ptr) #0 {
; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_value(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-ITERATIVE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]])
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float
; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = fmul float 4.000000e+00, [[TMP9]]
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP6]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP14:%.*]]
; IR-ITERATIVE: 12:
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP10]] seq_cst, align 4
; IR-ITERATIVE-NEXT: br label [[TMP14]]
; IR-ITERATIVE: 14:
; IR-ITERATIVE-NEXT: ret void
;
; IR-DPP-LABEL: @global_atomic_fadd_uni_value(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-DPP-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; IR-DPP-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
; IR-DPP-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
; IR-DPP-NEXT: [[TMP7:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]])
; IR-DPP-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
; IR-DPP-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float
; IR-DPP-NEXT: [[TMP10:%.*]] = fmul float 4.000000e+00, [[TMP9]]
; IR-DPP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP6]], 0
; IR-DPP-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP14:%.*]]
; IR-DPP: 12:
; IR-DPP-NEXT: [[TMP13:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP10]] seq_cst, align 4
; IR-DPP-NEXT: br label [[TMP14]]
; IR-DPP: 14:
; IR-DPP-NEXT: ret void
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 seq_cst
ret void
}
define amdgpu_kernel void @global_atomic_fadd_div_value(ptr addrspace(1) %ptr) #0 {
; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_value(
; IR-ITERATIVE-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; IR-ITERATIVE-NEXT: [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-ITERATIVE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
; IR-ITERATIVE: 8:
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP14:%.*]] seq_cst, align 4
; IR-ITERATIVE-NEXT: br label [[TMP10:%.*]]
; IR-ITERATIVE: 10:
; IR-ITERATIVE-NEXT: ret void
; IR-ITERATIVE: ComputeLoop:
; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP0:%.*]] ], [ [[TMP14]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP7]], [[TMP0]] ], [ [[TMP17:%.*]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true)
; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP11]] to i32
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[DIVVALUE]], i32 [[TMP12]])
; IR-ITERATIVE-NEXT: [[TMP14]] = fadd float [[ACCUMULATOR]], [[TMP13]]
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = shl i64 1, [[TMP11]]
; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = xor i64 [[TMP15]], -1
; IR-ITERATIVE-NEXT: [[TMP17]] = and i64 [[ACTIVEBITS]], [[TMP16]]
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP18]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]]
; IR-ITERATIVE: ComputeEnd:
; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP6]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP19]], label [[TMP8:%.*]], label [[TMP10]]
;
; IR-DPP-LABEL: @global_atomic_fadd_div_value(
; IR-DPP-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; IR-DPP-NEXT: [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float
; IR-DPP-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-DPP-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; IR-DPP-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
; IR-DPP-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
; IR-DPP-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[DIVVALUE]], float -0.000000e+00)
; IR-DPP-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP7]], i32 273, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP9:%.*]] = fadd float [[TMP7]], [[TMP8]]
; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 274, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP11:%.*]] = fadd float [[TMP9]], [[TMP10]]
; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 276, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP13:%.*]] = fadd float [[TMP11]], [[TMP12]]
; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 280, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP15:%.*]] = fadd float [[TMP13]], [[TMP14]]
; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 322, i32 10, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]]
; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 323, i32 12, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP19:%.*]] = fadd float [[TMP17]], [[TMP18]]
; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP19]], i32 63)
; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP20]])
; IR-DPP-NEXT: [[TMP22:%.*]] = icmp eq i32 [[TMP6]], 0
; IR-DPP-NEXT: br i1 [[TMP22]], label [[TMP23:%.*]], label [[TMP25:%.*]]
; IR-DPP: 23:
; IR-DPP-NEXT: [[TMP24:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP21]] seq_cst, align 4
; IR-DPP-NEXT: br label [[TMP25]]
; IR-DPP: 25:
; IR-DPP-NEXT: ret void
;
%id.x = call i32 @llvm.amdgcn.workitem.id.x()
%divValue = bitcast i32 %id.x to float
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue seq_cst
ret void
}
define amdgpu_kernel void @global_atomic_fsub_uni_value(ptr addrspace(1) %ptr) #0 {
; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_value(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-ITERATIVE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]])
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float
; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = fmul float 4.000000e+00, [[TMP9]]
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP6]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP14:%.*]]
; IR-ITERATIVE: 12:
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP10]] seq_cst, align 4
; IR-ITERATIVE-NEXT: br label [[TMP14]]
; IR-ITERATIVE: 14:
; IR-ITERATIVE-NEXT: ret void
;
; IR-DPP-LABEL: @global_atomic_fsub_uni_value(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-DPP-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; IR-DPP-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
; IR-DPP-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
; IR-DPP-NEXT: [[TMP7:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]])
; IR-DPP-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
; IR-DPP-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float
; IR-DPP-NEXT: [[TMP10:%.*]] = fmul float 4.000000e+00, [[TMP9]]
; IR-DPP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP6]], 0
; IR-DPP-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP14:%.*]]
; IR-DPP: 12:
; IR-DPP-NEXT: [[TMP13:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP10]] seq_cst, align 4
; IR-DPP-NEXT: br label [[TMP14]]
; IR-DPP: 14:
; IR-DPP-NEXT: ret void
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 seq_cst
ret void
}
define amdgpu_kernel void @global_atomic_fsub_div_value(ptr addrspace(1) %ptr) #0 {
; IR-ITERATIVE-LABEL: @global_atomic_fsub_div_value(
; IR-ITERATIVE-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; IR-ITERATIVE-NEXT: [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-ITERATIVE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
; IR-ITERATIVE: 8:
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP14:%.*]] seq_cst, align 4
; IR-ITERATIVE-NEXT: br label [[TMP10:%.*]]
; IR-ITERATIVE: 10:
; IR-ITERATIVE-NEXT: ret void
; IR-ITERATIVE: ComputeLoop:
; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP0:%.*]] ], [ [[TMP14]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP7]], [[TMP0]] ], [ [[TMP17:%.*]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true)
; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP11]] to i32
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[DIVVALUE]], i32 [[TMP12]])
; IR-ITERATIVE-NEXT: [[TMP14]] = fadd float [[ACCUMULATOR]], [[TMP13]]
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = shl i64 1, [[TMP11]]
; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = xor i64 [[TMP15]], -1
; IR-ITERATIVE-NEXT: [[TMP17]] = and i64 [[ACTIVEBITS]], [[TMP16]]
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP18]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]]
; IR-ITERATIVE: ComputeEnd:
; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP6]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP19]], label [[TMP8:%.*]], label [[TMP10]]
;
; IR-DPP-LABEL: @global_atomic_fsub_div_value(
; IR-DPP-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; IR-DPP-NEXT: [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float
; IR-DPP-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-DPP-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; IR-DPP-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
; IR-DPP-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
; IR-DPP-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[DIVVALUE]], float -0.000000e+00)
; IR-DPP-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP7]], i32 273, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP9:%.*]] = fadd float [[TMP7]], [[TMP8]]
; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 274, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP11:%.*]] = fadd float [[TMP9]], [[TMP10]]
; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 276, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP13:%.*]] = fadd float [[TMP11]], [[TMP12]]
; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 280, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP15:%.*]] = fadd float [[TMP13]], [[TMP14]]
; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 322, i32 10, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]]
; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 323, i32 12, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP19:%.*]] = fadd float [[TMP17]], [[TMP18]]
; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP19]], i32 63)
; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP20]])
; IR-DPP-NEXT: [[TMP22:%.*]] = icmp eq i32 [[TMP6]], 0
; IR-DPP-NEXT: br i1 [[TMP22]], label [[TMP23:%.*]], label [[TMP25:%.*]]
; IR-DPP: 23:
; IR-DPP-NEXT: [[TMP24:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP21]] seq_cst, align 4
; IR-DPP-NEXT: br label [[TMP25]]
; IR-DPP: 25:
; IR-DPP-NEXT: ret void
;
%id.x = call i32 @llvm.amdgcn.workitem.id.x()
%divValue = bitcast i32 %id.x to float
%result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue seq_cst
ret void
}
attributes #0 = {"target-cpu"="gfx906"}