llvm/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -enable-unsafe-fp-math -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck -check-prefix=GFX10 %s

# Test that we fold correct element from G_UNMERGE_VALUES into fma

---
name: test_f32_add_mul
body: |
  bb.1:
    liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3

    ; GFX10-LABEL: name: test_f32_add_mul
    ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
    ; GFX10-NEXT: {{  $}}
    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3
    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], %el1
    ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
    %0:_(s32) = COPY $vgpr0
    %1:_(s32) = COPY $vgpr1
    %ptr:_(p1) = COPY $vgpr2_vgpr3
    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    %6:_(s32) = G_FMUL %0, %1
    %7:_(s32) = G_FADD %6, %el1
    $vgpr0 = COPY %7(s32)
...

---
name: test_f32_add_mul_rhs
machineFunctionInfo:
  mode:
    fp32-input-denormals: false
    fp32-output-denormals: false
body: |
  bb.1:
    liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3

    ; GFX10-LABEL: name: test_f32_add_mul_rhs
    ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
    ; GFX10-NEXT: {{  $}}
    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3
    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], %el1
    ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
    %0:_(s32) = COPY $vgpr0
    %1:_(s32) = COPY $vgpr1
    %ptr:_(p1) = COPY $vgpr2_vgpr3
    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    %6:_(s32) = G_FMUL %0, %1
    %7:_(s32) = G_FADD %el1, %6
    $vgpr0 = COPY %7(s32)
...

---
name: test_f16_f32_add_ext_mul
machineFunctionInfo:
  mode:
    fp32-input-denormals: false
    fp32-output-denormals: false
body: |
  bb.1:
    liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1

    ; GFX10-LABEL: name: test_f16_f32_add_ext_mul
    ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1
    ; GFX10-NEXT: {{  $}}
    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
    ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1
    ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
    %0:_(s32) = COPY $sgpr0
    %1:_(s16) = G_TRUNC %0(s32)
    %2:_(s32) = COPY $sgpr1
    %3:_(s16) = G_TRUNC %2(s32)
    %ptr:_(p1) = COPY $vgpr0_vgpr1
    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    %8:_(s16) = nnan ninf nsz arcp contract afn reassoc G_FMUL %1, %3
    %9:_(s32) = G_FPEXT %8(s16)
    %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %9, %el1
    $vgpr0 = COPY %10(s32)
...

---
name: test_f16_f32_add_ext_mul_rhs
machineFunctionInfo:
  mode:
    fp32-input-denormals: false
    fp32-output-denormals: false
body: |
  bb.1:
    liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1

    ; GFX10-LABEL: name: test_f16_f32_add_ext_mul_rhs
    ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1
    ; GFX10-NEXT: {{  $}}
    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
    ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1
    ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
    %0:_(s32) = COPY $sgpr0
    %1:_(s16) = G_TRUNC %0(s32)
    %2:_(s32) = COPY $sgpr1
    %3:_(s16) = G_TRUNC %2(s32)
    %ptr:_(p1) = COPY $vgpr0_vgpr1
    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    %8:_(s16) = nnan ninf nsz arcp contract afn reassoc G_FMUL %1, %3
    %9:_(s32) = G_FPEXT %8(s16)
    %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %el1, %9
    $vgpr0 = COPY %10(s32)
...

---
name: test_f32_add_fma_mul
body: |
  bb.1:
    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5

    ; GFX10-LABEL: name: test_f32_add_fma_mul
    ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5
    ; GFX10-NEXT: {{  $}}
    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr4_vgpr5
    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY2]], [[COPY3]], %el1
    ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]]
    ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
    %0:_(s32) = COPY $vgpr0
    %1:_(s32) = COPY $vgpr1
    %2:_(s32) = COPY $vgpr2
    %3:_(s32) = COPY $vgpr3
    %ptr:_(p1) = COPY $vgpr4_vgpr5
    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    %8:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMUL %2, %3
    %9:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMA %0, %1, %8
    %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %9, %el1
    $vgpr0 = COPY %10(s32)
...

---
name: test_f32_add_fma_mul_rhs
body: |
  bb.1:
    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5

    ; GFX10-LABEL: name: test_f32_add_fma_mul_rhs
    ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5
    ; GFX10-NEXT: {{  $}}
    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr4_vgpr5
    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY2]], [[COPY3]], %el1
    ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]]
    ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
    %0:_(s32) = COPY $vgpr0
    %1:_(s32) = COPY $vgpr1
    %2:_(s32) = COPY $vgpr2
    %3:_(s32) = COPY $vgpr3
    %ptr:_(p1) = COPY $vgpr4_vgpr5
    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    %8:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMUL %2, %3
    %9:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMA %0, %1, %8
    %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %el1, %9
    $vgpr0 = COPY %10(s32)
...

---
name: test_f16_f32_add_fma_ext_mul
machineFunctionInfo:
  mode:
    fp32-input-denormals: false
    fp32-output-denormals: false
body: |
  bb.1:
    liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5

    ; GFX10-LABEL: name: test_f16_f32_add_fma_ext_mul
    ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5
    ; GFX10-NEXT: {{  $}}
    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3
    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4
    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5
    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
    ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
    ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1
    ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]]
    ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
    %0:_(s32) = COPY $vgpr0
    %1:_(s32) = COPY $vgpr1
    %ptr:_(p1) = COPY $vgpr2_vgpr3
    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    %6:_(s32) = COPY $vgpr4
    %7:_(s16) = G_TRUNC %6(s32)
    %8:_(s32) = COPY $vgpr5
    %9:_(s16) = G_TRUNC %8(s32)
    %10:_(s16) = G_FMUL %7, %9
    %11:_(s32) = G_FPEXT %10(s16)
    %12:_(s32) = G_FMA %0, %1, %11
    %13:_(s32) = G_FADD %12, %el1
    $vgpr0 = COPY %13(s32)
...

---
name: test_f16_f32_add_ext_fma_mul
machineFunctionInfo:
  mode:
    fp32-input-denormals: false
    fp32-output-denormals: false
body: |
  bb.1:
    liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5

    ; GFX10-LABEL: name: test_f16_f32_add_ext_fma_mul
    ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5
    ; GFX10-NEXT: {{  $}}
    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3
    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4
    ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5
    ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
    ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
    ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
    ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
    ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT2]], [[FPEXT3]], %el1
    ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FMA]]
    ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
    %0:_(s32) = COPY $vgpr0
    %1:_(s16) = G_TRUNC %0(s32)
    %2:_(s32) = COPY $vgpr1
    %3:_(s16) = G_TRUNC %2(s32)
    %ptr:_(p1) = COPY $vgpr2_vgpr3
    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    %8:_(s32) = COPY $vgpr4
    %9:_(s16) = G_TRUNC %8(s32)
    %10:_(s32) = COPY $vgpr5
    %11:_(s16) = G_TRUNC %10(s32)
    %12:_(s16) = G_FMUL %9, %11
    %13:_(s16) = G_FMUL %1, %3
    %14:_(s16) = G_FADD %13, %12
    %15:_(s32) = G_FPEXT %14(s16)
    %16:_(s32) = G_FADD %15, %el1
    $vgpr0 = COPY %16(s32)
...

---
name: test_f16_f32_add_fma_ext_mul_rhs
machineFunctionInfo:
  mode:
    fp32-input-denormals: false
    fp32-output-denormals: false
body: |
  bb.1:
    liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5

    ; GFX10-LABEL: name: test_f16_f32_add_fma_ext_mul_rhs
    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
    ; GFX10-NEXT: {{  $}}
    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr2
    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4
    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5
    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
    ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
    ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1
    ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]]
    ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
    %ptr:_(p1) = COPY $vgpr0_vgpr1
    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    %4:_(s32) = COPY $vgpr2
    %5:_(s32) = COPY $vgpr3
    %6:_(s32) = COPY $vgpr4
    %7:_(s16) = G_TRUNC %6(s32)
    %8:_(s32) = COPY $vgpr5
    %9:_(s16) = G_TRUNC %8(s32)
    %10:_(s16) = G_FMUL %7, %9
    %11:_(s32) = G_FPEXT %10(s16)
    %12:_(s32) = G_FMA %4, %5, %11
    %13:_(s32) = G_FADD %el1, %12
    $vgpr0 = COPY %13(s32)
...

---
name: test_f16_f32_add_ext_fma_mul_rhs
machineFunctionInfo:
  mode:
    fp32-input-denormals: false
    fp32-output-denormals: false
body: |
  bb.1:
    liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5

    ; GFX10-LABEL: name: test_f16_f32_add_ext_fma_mul_rhs
    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
    ; GFX10-NEXT: {{  $}}
    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr2
    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4
    ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5
    ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
    ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
    ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
    ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
    ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT2]], [[FPEXT3]], %el1
    ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FMA]]
    ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
    %ptr:_(p1) = COPY $vgpr0_vgpr1
    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    %4:_(s32) = COPY $vgpr2
    %5:_(s16) = G_TRUNC %4(s32)
    %6:_(s32) = COPY $vgpr3
    %7:_(s16) = G_TRUNC %6(s32)
    %8:_(s32) = COPY $vgpr4
    %9:_(s16) = G_TRUNC %8(s32)
    %10:_(s32) = COPY $vgpr5
    %11:_(s16) = G_TRUNC %10(s32)
    %12:_(s16) = G_FMUL %9, %11
    %13:_(s16) = G_FMUL %5, %7
    %14:_(s16) = G_FADD %13, %12
    %15:_(s32) = G_FPEXT %14(s16)
    %16:_(s32) = G_FADD %el1, %15
    $vgpr0 = COPY %16(s32)
...

---
name: test_f32_sub_mul
machineFunctionInfo:
  mode:
    fp32-input-denormals: false
    fp32-output-denormals: false
body: |
  bb.1:
    liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3

    ; GFX10-LABEL: name: test_f32_sub_mul
    ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
    ; GFX10-NEXT: {{  $}}
    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG %el1
    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FNEG]]
    ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
    %0:_(s32) = COPY $vgpr0
    %1:_(s32) = COPY $vgpr1
    %ptr:_(p1) = COPY $vgpr0_vgpr1
    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    %6:_(s32) = G_FMUL %0, %1
    %7:_(s32) = G_FSUB %6, %el1
    $vgpr0 = COPY %7(s32)
...

---
name: test_f32_sub_mul_rhs
machineFunctionInfo:
  mode:
    fp32-input-denormals: false
    fp32-output-denormals: false
body: |
  bb.1:
    liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3

    ; GFX10-LABEL: name: test_f32_sub_mul_rhs
    ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
    ; GFX10-NEXT: {{  $}}
    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3
    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[COPY1]], %el1
    ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
    %0:_(s32) = COPY $vgpr0
    %1:_(s32) = COPY $vgpr1
    %ptr:_(p1) = COPY $vgpr2_vgpr3
    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
    %6:_(s32) = G_FMUL %0, %1
    %7:_(s32) = G_FSUB %el1, %6
    $vgpr0 = COPY %7(s32)
...