# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1200 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=GFX12
---
name: test_fmed3_f32_known_nnan_ieee_true
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo:
mode:
ieee: true
dx10-clamp: true
body: |
bb.1 :
liveins: $vgpr0
; CHECK-LABEL: name: test_fmed3_f32_known_nnan_ieee_true
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_CLAMP [[FMUL]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
;
; GFX12-LABEL: name: test_fmed3_f32_known_nnan_ieee_true
; GFX12: liveins: $vgpr0
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = nnan G_AMDGPU_CLAMP [[FMUL]]
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%8:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_FMUL %0, %8
%6:sgpr(s32) = G_FCONSTANT float 1.000000e+00
%5:sgpr(s32) = G_FCONSTANT float 0.000000e+00
%9:vgpr(s32) = COPY %5(s32)
%10:vgpr(s32) = COPY %6(s32)
%4:vgpr(s32) = nnan G_AMDGPU_FMED3 %3(s32), %9(s32), %10(s32)
$vgpr0 = COPY %4(s32)
...
---
name: test_fmed3_f16_known_nnan_ieee_false
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo:
mode:
ieee: false
dx10-clamp: true
body: |
bb.1 :
liveins: $vgpr0
; CHECK-LABEL: name: test_fmed3_f16_known_nnan_ieee_false
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[C]](s16)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s16) = G_FMUL [[TRUNC]], [[COPY1]]
; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s16) = nnan G_AMDGPU_CLAMP [[FMUL]]
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[AMDGPU_CLAMP]](s16)
; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
;
; GFX12-LABEL: name: test_fmed3_f16_known_nnan_ieee_false
; GFX12: liveins: $vgpr0
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX12-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s16) = G_FCONSTANT half 0xH4000
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s16) = COPY [[C]](s16)
; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s16) = G_FMUL [[TRUNC]], [[COPY1]]
; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s16) = nnan G_AMDGPU_CLAMP [[FMUL]]
; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[AMDGPU_CLAMP]](s16)
; GFX12-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
%2:vgpr(s32) = COPY $vgpr0
%0:vgpr(s16) = G_TRUNC %2(s32)
%3:sgpr(s16) = G_FCONSTANT half 0xH4000
%10:vgpr(s16) = COPY %3(s16)
%4:vgpr(s16) = G_FMUL %0, %10
%7:sgpr(s16) = G_FCONSTANT half 0xH3C00
%6:sgpr(s16) = G_FCONSTANT half 0xH0000
%11:vgpr(s16) = COPY %6(s16)
%12:vgpr(s16) = COPY %7(s16)
%5:vgpr(s16) = nnan G_AMDGPU_FMED3 %4(s16), %11(s16), %12(s16)
%9:vgpr(s32) = G_ANYEXT %5(s16)
$vgpr0 = COPY %9(s32)
...
---
name: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo:
mode:
ieee: true
dx10-clamp: true
body: |
bb.1 :
liveins: $vgpr0
; CHECK-LABEL: name: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+01
; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY]]
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]]
; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMINNUM_IEEE]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
;
; GFX12-LABEL: name: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true
; GFX12: liveins: $vgpr0
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+01
; GFX12-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY]]
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX12-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]]
; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMINNUM_IEEE]]
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 1.000000e+01
%8:vgpr(s32) = G_FCANONICALIZE %0
%9:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_FMINNUM_IEEE %8, %9
%6:sgpr(s32) = G_FCONSTANT float 1.000000e+00
%5:sgpr(s32) = G_FCONSTANT float 0.000000e+00
%10:vgpr(s32) = COPY %5(s32)
%11:vgpr(s32) = COPY %6(s32)
%4:vgpr(s32) = G_AMDGPU_FMED3 %3(s32), %10(s32), %11(s32)
$vgpr0 = COPY %4(s32)
...
---
name: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo:
mode:
ieee: true
dx10-clamp: true
body: |
bb.1 :
liveins: $vgpr0
; CHECK-LABEL: name: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
;
; GFX12-LABEL: name: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true
; GFX12: liveins: $vgpr0
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%8:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_FMUL %0, %8
%6:sgpr(s32) = G_FCONSTANT float 0.000000e+00
%5:sgpr(s32) = G_FCONSTANT float 1.000000e+00
%9:vgpr(s32) = COPY %5(s32)
%10:vgpr(s32) = COPY %6(s32)
%4:vgpr(s32) = G_AMDGPU_FMED3 %3(s32), %9(s32), %10(s32)
$vgpr0 = COPY %4(s32)
...
# FixMe: add tests with attributes #3 = {"no-nans-fp-math"="true"}
---
name: test_fmed3_f32_maybe_NaN_ieee_false
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo:
mode:
ieee: false
dx10-clamp: true
body: |
bb.1 :
liveins: $vgpr0
; CHECK-LABEL: name: test_fmed3_f32_maybe_NaN_ieee_false
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
;
; GFX12-LABEL: name: test_fmed3_f32_maybe_NaN_ieee_false
; GFX12: liveins: $vgpr0
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%8:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_FMUL %0, %8
%6:sgpr(s32) = G_FCONSTANT float 0.000000e+00
%5:sgpr(s32) = G_FCONSTANT float 1.000000e+00
%9:vgpr(s32) = COPY %5(s32)
%10:vgpr(s32) = COPY %6(s32)
%4:vgpr(s32) = G_AMDGPU_FMED3 %3(s32), %9(s32), %10(s32)
$vgpr0 = COPY %4(s32)
...
---
name: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo:
mode:
ieee: true
dx10-clamp: false
body: |
bb.1 :
liveins: $vgpr0
; CHECK-LABEL: name: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+01
; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY]]
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]]
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMINNUM_IEEE]], [[COPY2]], [[COPY3]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32)
;
; GFX12-LABEL: name: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false
; GFX12: liveins: $vgpr0
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+01
; GFX12-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY]]
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX12-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]]
; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMINNUM_IEEE]]
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 1.000000e+01
%8:vgpr(s32) = G_FCANONICALIZE %0
%9:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_FMINNUM_IEEE %8, %9
%6:sgpr(s32) = G_FCONSTANT float 1.000000e+00
%5:sgpr(s32) = G_FCONSTANT float 0.000000e+00
%10:vgpr(s32) = COPY %5(s32)
%11:vgpr(s32) = COPY %6(s32)
%4:vgpr(s32) = G_AMDGPU_FMED3 %3(s32), %10(s32), %11(s32)
$vgpr0 = COPY %4(s32)
...
---
name: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo:
mode:
ieee: true
dx10-clamp: true
body: |
bb.1 :
liveins: $vgpr0
; CHECK-LABEL: name: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
;
; GFX12-LABEL: name: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true
; GFX12: liveins: $vgpr0
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]]
; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]]
; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32)
%0:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_FCONSTANT float 2.000000e+00
%8:vgpr(s32) = COPY %2(s32)
%3:vgpr(s32) = G_FMUL %0, %8
%6:sgpr(s32) = G_FCONSTANT float 1.000000e+00
%5:sgpr(s32) = G_FCONSTANT float 0.000000e+00
%9:vgpr(s32) = COPY %5(s32)
%10:vgpr(s32) = COPY %6(s32)
%4:vgpr(s32) = G_AMDGPU_FMED3 %3(s32), %9(s32), %10(s32)
$vgpr0 = COPY %4(s32)
...