; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -start-before=amdgpu-unify-divergent-exit-nodes --verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SDAG %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -start-before=amdgpu-unify-divergent-exit-nodes --verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-GISEL %s
; --------------------------------------------------------------------------------
; fminimum tests
; --------------------------------------------------------------------------------
; GCN-LABEL: {{^}}v_fneg_minimum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: global_load_b32 [[B:v[0-9]+]]
; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], -[[A]], -[[B]]
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
%a = load volatile float, ptr addrspace(1) %a.gep
%b = load volatile float, ptr addrspace(1) %b.gep
%min = call float @llvm.minimum.f32(float %a, float %b)
%fneg = fneg float %min
store float %fneg, ptr addrspace(1) %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_self_minimum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], -[[A]], -[[A]]
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_self_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
%a = load volatile float, ptr addrspace(1) %a.gep
%min = call float @llvm.minimum.f32(float %a, float %a)
%min.fneg = fneg float %min
store float %min.fneg, ptr addrspace(1) %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_posk_minimum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], -[[A]], -4.0
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_posk_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
%a = load volatile float, ptr addrspace(1) %a.gep
%min = call float @llvm.minimum.f32(float %a, float 4.0)
%fneg = fneg float %min
store float %fneg, ptr addrspace(1) %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_negk_minimum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], -[[A]], 4.0
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_negk_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
%a = load volatile float, ptr addrspace(1) %a.gep
%min = call float @llvm.minimum.f32(float %a, float -4.0)
%fneg = fneg float %min
store float %fneg, ptr addrspace(1) %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_0_minimum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], [[A]], 0
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_0_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
%a = load volatile float, ptr addrspace(1) %a.gep
%min = call float @llvm.minimum.f32(float %a, float 0.0)
%fneg = fneg float %min
store float %fneg, ptr addrspace(1) %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_0_minimum_foldable_use_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: global_load_b32 [[B:v[0-9]+]]
; GCN: v_minimum_f32 [[MIN:v[0-9]+]], [[A]], 0
; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_0_minimum_foldable_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
%a = load volatile float, ptr addrspace(1) %a.gep
%b = load volatile float, ptr addrspace(1) %b.gep
%min = call float @llvm.minimum.f32(float %a, float 0.0)
%fneg = fneg float %min
%mul = fmul float %fneg, %b
store float %mul, ptr addrspace(1) %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_minimum_multi_use_minimum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: global_load_b32 [[B:v[0-9]+]]
; GCN: v_maximum_f32 [[MAX0:v[0-9]+]], -[[A]], -[[B]]
; GCN-SDAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
; GCN-GISEL: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MAX0]], 4.0
; GCN: global_store_b32 v[{{[0-9:]+}}], [[MAX0]]
; GCN: global_store_b32 v[{{[0-9:]+}}], [[MUL1]]
define void @v_fneg_minimum_multi_use_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
%a = load volatile float, ptr addrspace(1) %a.gep
%b = load volatile float, ptr addrspace(1) %b.gep
%min = call float @llvm.minimum.f32(float %a, float %b)
%fneg = fneg float %min
%use1 = fmul float %min, 4.0
store volatile float %fneg, ptr addrspace(1) %out
store volatile float %use1, ptr addrspace(1) %out
ret void
}
; --------------------------------------------------------------------------------
; fmaximum tests
; --------------------------------------------------------------------------------
; GCN-LABEL: {{^}}v_fneg_maximum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: global_load_b32 [[B:v[0-9]+]]
; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], -[[A]], -[[B]]
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
%a = load volatile float, ptr addrspace(1) %a.gep
%b = load volatile float, ptr addrspace(1) %b.gep
%min = call float @llvm.maximum.f32(float %a, float %b)
%fneg = fneg float %min
store float %fneg, ptr addrspace(1) %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_self_maximum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], -[[A]], -[[A]]
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_self_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
%a = load volatile float, ptr addrspace(1) %a.gep
%min = call float @llvm.maximum.f32(float %a, float %a)
%min.fneg = fneg float %min
store float %min.fneg, ptr addrspace(1) %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_posk_maximum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], -[[A]], -4.0
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_posk_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
%a = load volatile float, ptr addrspace(1) %a.gep
%min = call float @llvm.maximum.f32(float %a, float 4.0)
%fneg = fneg float %min
store float %fneg, ptr addrspace(1) %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_negk_maximum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], -[[A]], 4.0
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_negk_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
%a = load volatile float, ptr addrspace(1) %a.gep
%min = call float @llvm.maximum.f32(float %a, float -4.0)
%fneg = fneg float %min
store float %fneg, ptr addrspace(1) %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_0_maximum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], [[A]], 0
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_0_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
%a = load volatile float, ptr addrspace(1) %a.gep
%max = call float @llvm.maximum.f32(float %a, float 0.0)
%fneg = fneg float %max
store float %fneg, ptr addrspace(1) %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_0_maximum_foldable_use_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: global_load_b32 [[B:v[0-9]+]]
; GCN: v_maximum_f32 [[MAX:v[0-9]+]], [[A]], 0
; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], [[B]]
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_0_maximum_foldable_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
%a = load volatile float, ptr addrspace(1) %a.gep
%b = load volatile float, ptr addrspace(1) %b.gep
%max = call float @llvm.maximum.f32(float %a, float 0.0)
%fneg = fneg float %max
%mul = fmul float %fneg, %b
store float %mul, ptr addrspace(1) %out.gep
ret void
}
; GCN-LABEL: {{^}}v_fneg_maximum_multi_use_maximum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: global_load_b32 [[B:v[0-9]+]]
; GCN: v_minimum_f32 [[MAX0:v[0-9]+]], -[[A]], -[[B]]
; GCN-SDAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
; GCN-GISEL: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MAX0]], 4.0
; GCN: global_store_b32 v[{{[0-9:]+}}], [[MAX0]]
; GCN: global_store_b32 v[{{[0-9:]+}}], [[MUL1]]
define void @v_fneg_maximum_multi_use_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
%a = load volatile float, ptr addrspace(1) %a.gep
%b = load volatile float, ptr addrspace(1) %b.gep
%min = call float @llvm.maximum.f32(float %a, float %b)
%fneg = fneg float %min
%use1 = fmul float %min, 4.0
store volatile float %fneg, ptr addrspace(1) %out
store volatile float %use1, ptr addrspace(1) %out
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x()
declare float @llvm.minimum.f32(float, float)
declare float @llvm.maximum.f32(float, float)