llvm/llvm/test/CodeGen/AMDGPU/fneg-combines-gfx1200.ll

; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -start-before=amdgpu-unify-divergent-exit-nodes --verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SDAG %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -start-before=amdgpu-unify-divergent-exit-nodes --verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-GISEL %s

; --------------------------------------------------------------------------------
; fminimum tests
; --------------------------------------------------------------------------------

; GCN-LABEL: {{^}}v_fneg_minimum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: global_load_b32 [[B:v[0-9]+]]
; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], -[[A]], -[[B]]
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %tid.ext = sext i32 %tid to i64
  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
  %a = load volatile float, ptr addrspace(1) %a.gep
  %b = load volatile float, ptr addrspace(1) %b.gep
  %min = call float @llvm.minimum.f32(float %a, float %b)
  %fneg = fneg float %min
  store float %fneg, ptr addrspace(1) %out.gep
  ret void
}

; GCN-LABEL: {{^}}v_fneg_self_minimum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], -[[A]], -[[A]]
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_self_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %tid.ext = sext i32 %tid to i64
  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
  %a = load volatile float, ptr addrspace(1) %a.gep
  %min = call float @llvm.minimum.f32(float %a, float %a)
  %min.fneg = fneg float %min
  store float %min.fneg, ptr addrspace(1) %out.gep
  ret void
}

; GCN-LABEL: {{^}}v_fneg_posk_minimum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], -[[A]], -4.0
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_posk_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %tid.ext = sext i32 %tid to i64
  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
  %a = load volatile float, ptr addrspace(1) %a.gep
  %min = call float @llvm.minimum.f32(float %a, float 4.0)
  %fneg = fneg float %min
  store float %fneg, ptr addrspace(1) %out.gep
  ret void
}

; GCN-LABEL: {{^}}v_fneg_negk_minimum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], -[[A]], 4.0
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_negk_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %tid.ext = sext i32 %tid to i64
  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
  %a = load volatile float, ptr addrspace(1) %a.gep
  %min = call float @llvm.minimum.f32(float %a, float -4.0)
  %fneg = fneg float %min
  store float %fneg, ptr addrspace(1) %out.gep
  ret void
}

; GCN-LABEL: {{^}}v_fneg_0_minimum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], [[A]], 0
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_0_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %tid.ext = sext i32 %tid to i64
  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
  %a = load volatile float, ptr addrspace(1) %a.gep
  %min = call float @llvm.minimum.f32(float %a, float 0.0)
  %fneg = fneg float %min
  store float %fneg, ptr addrspace(1) %out.gep
  ret void
}

; GCN-LABEL: {{^}}v_fneg_0_minimum_foldable_use_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: global_load_b32 [[B:v[0-9]+]]
; GCN: v_minimum_f32 [[MIN:v[0-9]+]], [[A]], 0
; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_0_minimum_foldable_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %tid.ext = sext i32 %tid to i64
  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
  %a = load volatile float, ptr addrspace(1) %a.gep
  %b = load volatile float, ptr addrspace(1) %b.gep
  %min = call float @llvm.minimum.f32(float %a, float 0.0)
  %fneg = fneg float %min
  %mul = fmul float %fneg, %b
  store float %mul, ptr addrspace(1) %out.gep
  ret void
}

; GCN-LABEL: {{^}}v_fneg_minimum_multi_use_minimum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: global_load_b32 [[B:v[0-9]+]]
; GCN: v_maximum_f32 [[MAX0:v[0-9]+]], -[[A]], -[[B]]
; GCN-SDAG:  v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
; GCN-GISEL: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MAX0]], 4.0
; GCN: global_store_b32 v[{{[0-9:]+}}], [[MAX0]]
; GCN: global_store_b32 v[{{[0-9:]+}}], [[MUL1]]
define void @v_fneg_minimum_multi_use_minimum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %tid.ext = sext i32 %tid to i64
  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
  %a = load volatile float, ptr addrspace(1) %a.gep
  %b = load volatile float, ptr addrspace(1) %b.gep
  %min = call float @llvm.minimum.f32(float %a, float %b)
  %fneg = fneg float %min
  %use1 = fmul float %min, 4.0
  store volatile float %fneg, ptr addrspace(1) %out
  store volatile float %use1, ptr addrspace(1) %out
  ret void
}

; --------------------------------------------------------------------------------
; fmaximum tests
; --------------------------------------------------------------------------------

; GCN-LABEL: {{^}}v_fneg_maximum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: global_load_b32 [[B:v[0-9]+]]
; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], -[[A]], -[[B]]
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %tid.ext = sext i32 %tid to i64
  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
  %a = load volatile float, ptr addrspace(1) %a.gep
  %b = load volatile float, ptr addrspace(1) %b.gep
  %min = call float @llvm.maximum.f32(float %a, float %b)
  %fneg = fneg float %min
  store float %fneg, ptr addrspace(1) %out.gep
  ret void
}

; GCN-LABEL: {{^}}v_fneg_self_maximum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], -[[A]], -[[A]]
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_self_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %tid.ext = sext i32 %tid to i64
  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
  %a = load volatile float, ptr addrspace(1) %a.gep
  %min = call float @llvm.maximum.f32(float %a, float %a)
  %min.fneg = fneg float %min
  store float %min.fneg, ptr addrspace(1) %out.gep
  ret void
}

; GCN-LABEL: {{^}}v_fneg_posk_maximum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], -[[A]], -4.0
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_posk_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %tid.ext = sext i32 %tid to i64
  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
  %a = load volatile float, ptr addrspace(1) %a.gep
  %min = call float @llvm.maximum.f32(float %a, float 4.0)
  %fneg = fneg float %min
  store float %fneg, ptr addrspace(1) %out.gep
  ret void
}

; GCN-LABEL: {{^}}v_fneg_negk_maximum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: v_minimum_f32 [[RESULT:v[0-9]+]], -[[A]], 4.0
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_negk_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %tid.ext = sext i32 %tid to i64
  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
  %a = load volatile float, ptr addrspace(1) %a.gep
  %min = call float @llvm.maximum.f32(float %a, float -4.0)
  %fneg = fneg float %min
  store float %fneg, ptr addrspace(1) %out.gep
  ret void
}

; GCN-LABEL: {{^}}v_fneg_0_maximum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: v_maximum_f32 [[RESULT:v[0-9]+]], [[A]], 0
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_0_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %tid.ext = sext i32 %tid to i64
  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
  %a = load volatile float, ptr addrspace(1) %a.gep
  %max = call float @llvm.maximum.f32(float %a, float 0.0)
  %fneg = fneg float %max
  store float %fneg, ptr addrspace(1) %out.gep
  ret void
}

; GCN-LABEL: {{^}}v_fneg_0_maximum_foldable_use_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: global_load_b32 [[B:v[0-9]+]]
; GCN: v_maximum_f32 [[MAX:v[0-9]+]], [[A]], 0
; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], [[B]]
; GCN: global_store_b32 v[{{[0-9:]+}}], [[RESULT]]
define void @v_fneg_0_maximum_foldable_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %tid.ext = sext i32 %tid to i64
  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
  %a = load volatile float, ptr addrspace(1) %a.gep
  %b = load volatile float, ptr addrspace(1) %b.gep
  %max = call float @llvm.maximum.f32(float %a, float 0.0)
  %fneg = fneg float %max
  %mul = fmul float %fneg, %b
  store float %mul, ptr addrspace(1) %out.gep
  ret void
}

; GCN-LABEL: {{^}}v_fneg_maximum_multi_use_maximum_f32:
; GCN: global_load_b32 [[A:v[0-9]+]]
; GCN: global_load_b32 [[B:v[0-9]+]]
; GCN: v_minimum_f32 [[MAX0:v[0-9]+]], -[[A]], -[[B]]
; GCN-SDAG:  v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
; GCN-GISEL: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[MAX0]], 4.0
; GCN: global_store_b32 v[{{[0-9:]+}}], [[MAX0]]
; GCN: global_store_b32 v[{{[0-9:]+}}], [[MUL1]]
define void @v_fneg_maximum_multi_use_maximum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) {
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %tid.ext = sext i32 %tid to i64
  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
  %a = load volatile float, ptr addrspace(1) %a.gep
  %b = load volatile float, ptr addrspace(1) %b.gep
  %min = call float @llvm.maximum.f32(float %a, float %b)
  %fneg = fneg float %min
  %use1 = fmul float %min, 4.0
  store volatile float %fneg, ptr addrspace(1) %out
  store volatile float %use1, ptr addrspace(1) %out
  ret void
}

declare i32 @llvm.amdgcn.workitem.id.x()
declare float @llvm.minimum.f32(float, float)
declare float @llvm.maximum.f32(float, float)