llvm/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64 -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs %s -o - | FileCheck %s

---
name:            splat_4xi32
alignment:       4
legalized:       true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $w0

    ; CHECK-LABEL: name: splat_4xi32
    ; CHECK: liveins: $w0
    ; CHECK-NEXT: {{  $}}
    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
    ; CHECK-NEXT: [[DUP:%[0-9]+]]:_(<4 x s32>) = G_DUP [[COPY]](s32)
    ; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>)
    ; CHECK-NEXT: RET_ReallyLR implicit $q0
    %0:_(s32) = COPY $w0
    %2:_(<4 x s32>) = G_IMPLICIT_DEF
    %3:_(s64) = G_CONSTANT i64 0
    %1:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s64)
    %4:_(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, 0, 0, 0)
    $q0 = COPY %4(<4 x s32>)
    RET_ReallyLR implicit $q0

...
---
name:            splat_2xi64
alignment:       4
legalized:       true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $x0

    ; CHECK-LABEL: name: splat_2xi64
    ; CHECK: liveins: $x0
    ; CHECK-NEXT: {{  $}}
    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
    ; CHECK-NEXT: [[DUP:%[0-9]+]]:_(<2 x s64>) = G_DUP [[COPY]](s64)
    ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
    ; CHECK-NEXT: RET_ReallyLR implicit $q0
    %0:_(s64) = COPY $x0
    %2:_(<2 x s64>) = G_IMPLICIT_DEF
    %3:_(s64) = G_CONSTANT i64 0
    %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s64)
    %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 0)
    $q0 = COPY %4(<2 x s64>)
    RET_ReallyLR implicit $q0

...
---
name:            splat_2xi32
alignment:       4
legalized:       true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $w0

    ; CHECK-LABEL: name: splat_2xi32
    ; CHECK: liveins: $w0
    ; CHECK-NEXT: {{  $}}
    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
    ; CHECK-NEXT: [[DUP:%[0-9]+]]:_(<2 x s32>) = G_DUP [[COPY]](s32)
    ; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>)
    ; CHECK-NEXT: RET_ReallyLR implicit $d0
    %0:_(s32) = COPY $w0
    %2:_(<2 x s32>) = G_IMPLICIT_DEF
    %3:_(s64) = G_CONSTANT i64 0
    %1:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s64)
    %4:_(<2 x s32>) = G_SHUFFLE_VECTOR %1(<2 x s32>), %2, shufflemask(0, 0)
    $d0 = COPY %4(<2 x s32>)
    RET_ReallyLR implicit $d0

...
---
name:            splat_4xf32
alignment:       4
legalized:       true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $s0

    ; CHECK-LABEL: name: splat_4xf32
    ; CHECK: liveins: $s0
    ; CHECK-NEXT: {{  $}}
    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
    ; CHECK-NEXT: [[DUP:%[0-9]+]]:_(<4 x s32>) = G_DUP [[COPY]](s32)
    ; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>)
    ; CHECK-NEXT: RET_ReallyLR implicit $q0
    %0:_(s32) = COPY $s0
    %2:_(<4 x s32>) = G_IMPLICIT_DEF
    %3:_(s64) = G_CONSTANT i64 0
    %1:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s64)
    %4:_(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, 0, 0, 0)
    $q0 = COPY %4(<4 x s32>)
    RET_ReallyLR implicit $q0

...
---
name:            splat_2xf64
alignment:       4
legalized:       true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $d0

    ; CHECK-LABEL: name: splat_2xf64
    ; CHECK: liveins: $d0
    ; CHECK-NEXT: {{  $}}
    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
    ; CHECK-NEXT: [[DUP:%[0-9]+]]:_(<2 x s64>) = G_DUP [[COPY]](s64)
    ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
    ; CHECK-NEXT: RET_ReallyLR implicit $q0
    %0:_(s64) = COPY $d0
    %2:_(<2 x s64>) = G_IMPLICIT_DEF
    %3:_(s64) = G_CONSTANT i64 0
    %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s64)
    %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 0)
    $q0 = COPY %4(<2 x s64>)
    RET_ReallyLR implicit $q0

...
---
name:            splat_2xf32
alignment:       4
legalized:       true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $s0

    ; CHECK-LABEL: name: splat_2xf32
    ; CHECK: liveins: $s0
    ; CHECK-NEXT: {{  $}}
    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
    ; CHECK-NEXT: [[DUP:%[0-9]+]]:_(<2 x s32>) = G_DUP [[COPY]](s32)
    ; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>)
    ; CHECK-NEXT: RET_ReallyLR implicit $d0
    %0:_(s32) = COPY $s0
    %2:_(<2 x s32>) = G_IMPLICIT_DEF
    %3:_(s64) = G_CONSTANT i64 0
    %1:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s64)
    %4:_(<2 x s32>) = G_SHUFFLE_VECTOR %1(<2 x s32>), %2, shufflemask(0, 0)
    $d0 = COPY %4(<2 x s32>)
    RET_ReallyLR implicit $d0

...
---
name:            splat_2xf64_copies
alignment:       4
legalized:       true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $d0

    ; This test is exactly the same as splat_2xf64, except it adds two copies.
    ; These copies shouldn't get in the way of matching the dup pattern.
    ; CHECK-LABEL: name: splat_2xf64_copies
    ; CHECK: liveins: $d0
    ; CHECK-NEXT: {{  $}}
    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
    ; CHECK-NEXT: [[DUP:%[0-9]+]]:_(<2 x s64>) = G_DUP [[COPY]](s64)
    ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
    ; CHECK-NEXT: RET_ReallyLR implicit $q0
    %0:_(s64) = COPY $d0
    %2:_(<2 x s64>) = G_IMPLICIT_DEF
    %6:_(<2 x s64>) = COPY %2
    %3:_(s64) = G_CONSTANT i64 0
    %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %6, %0(s64), %3(s64)
    %7:_(<2 x s64>) = COPY %1
    %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %7(<2 x s64>), %2, shufflemask(0, 0)
    $q0 = COPY %4(<2 x s64>)
    RET_ReallyLR implicit $q0

...
---
name:            not_all_zeros
alignment:       4
legalized:       true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $x0
    ; Make sure that we don't do the optimization when it's not all zeroes.
    ; CHECK-LABEL: name: not_all_zeros
    ; CHECK: liveins: $x0
    ; CHECK-NEXT: {{  $}}
    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s64)
    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY [[IVEC]](<2 x s64>)
    ; CHECK-NEXT: $q0 = COPY [[COPY1]](<2 x s64>)
    ; CHECK-NEXT: RET_ReallyLR implicit $q0
    %0:_(s64) = COPY $x0
    %2:_(<2 x s64>) = G_IMPLICIT_DEF
    %3:_(s64) = G_CONSTANT i64 0
    %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s64)
    %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 1)
    $q0 = COPY %4(<2 x s64>)
    RET_ReallyLR implicit $q0

...
---
name:            all_undef
alignment:       4
legalized:       true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $x0
    ; If all the elements are undefined, we consider it a splat. In this case,
    ; we can choose 0 as our index.
    ;
    ; We should get a G_DUP here.
    ;
    ; CHECK-LABEL: name: all_undef
    ; CHECK: liveins: $x0
    ; CHECK-NEXT: {{  $}}
    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
    ; CHECK-NEXT: [[DUP:%[0-9]+]]:_(<2 x s64>) = G_DUP [[COPY]](s64)
    ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
    ; CHECK-NEXT: RET_ReallyLR implicit $q0
    %0:_(s64) = COPY $x0
    %2:_(<2 x s64>) = G_IMPLICIT_DEF
    %3:_(s64) = G_CONSTANT i64 0
    %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s64)
    %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(-1, -1)
    $q0 = COPY %4(<2 x s64>)
    RET_ReallyLR implicit $q0

...
---
name:            one_undef
alignment:       4
legalized:       true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $s0
    ; Make sure we can skip past undef values.
    ;
    ; We should get a G_DUP here.
    ;
    ; CHECK-LABEL: name: one_undef
    ; CHECK: liveins: $s0
    ; CHECK-NEXT: {{  $}}
    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
    ; CHECK-NEXT: [[DUP:%[0-9]+]]:_(<4 x s32>) = G_DUP [[COPY]](s32)
    ; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>)
    ; CHECK-NEXT: RET_ReallyLR implicit $q0
    %0:_(s32) = COPY $s0
    %2:_(<4 x s32>) = G_IMPLICIT_DEF
    %3:_(s64) = G_CONSTANT i64 0
    %1:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s64)
    %4:_(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, -1, 0, 0)
    $q0 = COPY %4(<4 x s32>)
    RET_ReallyLR implicit $q0

...
---
name:            not_all_zeros_with_undefs
alignment:       4
legalized:       true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $s0
    ; CHECK-LABEL: name: not_all_zeros_with_undefs
    ; CHECK: liveins: $s0
    ; CHECK-NEXT: {{  $}}
    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s64)
    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s32>), [[DEF]], shufflemask(undef, 0, 0, 3)
    ; CHECK-NEXT: $q0 = COPY [[SHUF]](<4 x s32>)
    ; CHECK-NEXT: RET_ReallyLR implicit $q0
    %0:_(s32) = COPY $s0
    %2:_(<4 x s32>) = G_IMPLICIT_DEF
    %3:_(s64) = G_CONSTANT i64 0
    %1:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s64)
    %4:_(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(-1, 0, 0, 3)
    $q0 = COPY %4(<4 x s32>)
    RET_ReallyLR implicit $q0

...
---
name:            splat_4xi16
alignment:       4
legalized:       true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $h0
    ; CHECK-LABEL: name: splat_4xi16
    ; CHECK: liveins: $h0
    ; CHECK-NEXT: {{  $}}
    ; CHECK-NEXT: %copy:_(s16) = COPY $h0
    ; CHECK-NEXT: %splat:_(<4 x s16>) = G_DUP %copy(s16)
    ; CHECK-NEXT: $d0 = COPY %splat(<4 x s16>)
    ; CHECK-NEXT: RET_ReallyLR implicit $d0
    %copy:_(s16) = COPY $h0
    %undef:_(<4 x s16>) = G_IMPLICIT_DEF
    %cst:_(s64) = G_CONSTANT i64 0
    %ins:_(<4 x s16>) = G_INSERT_VECTOR_ELT %undef, %copy(s16), %cst(s64)
    %splat:_(<4 x s16>) = G_SHUFFLE_VECTOR %ins(<4 x s16>), %undef, shufflemask(0, 0, 0, 0)
    $d0 = COPY %splat(<4 x s16>)
    RET_ReallyLR implicit $d0

...
---
name:            splat_8xi8
alignment:       4
legalized:       true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $w0
    ; CHECK-LABEL: name: splat_8xi8
    ; CHECK: liveins: $w0
    ; CHECK-NEXT: {{  $}}
    ; CHECK-NEXT: %copy:_(s32) = COPY $w0
    ; CHECK-NEXT: %splat:_(<8 x s8>) = G_DUP %copy(s32)
    ; CHECK-NEXT: $d0 = COPY %splat(<8 x s8>)
    ; CHECK-NEXT: RET_ReallyLR implicit $d0
    %copy:_(s32) = COPY $w0
    %undef:_(<8 x s8>) = G_IMPLICIT_DEF
    %cst:_(s64) = G_CONSTANT i64 0
    %ins:_(<8 x s8>) = G_INSERT_VECTOR_ELT %undef, %copy(s32), %cst(s64)
    %splat:_(<8 x s8>) = G_SHUFFLE_VECTOR %ins(<8 x s8>), %undef, shufflemask(0, 0, 0, 0, 0, 0, 0, 0)
    $d0 = COPY %splat(<8 x s8>)
    RET_ReallyLR implicit $d0

...
---
name:            build_vector
alignment:       4
legalized:       true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $w0, $w1, $w2, $w3
    ; The G_SHUFFLE_VECTOR is fed by a G_BUILD_VECTOR, and the 0th input
    ; operand is not a constant. We should get a G_DUP.
    ;
    ; CHECK-LABEL: name: build_vector
    ; CHECK: liveins: $w0, $w1, $w2, $w3
    ; CHECK-NEXT: {{  $}}
    ; CHECK-NEXT: %lane:_(s32) = COPY $w0
    ; CHECK-NEXT: %shuf:_(<4 x s32>) = G_DUP %lane(s32)
    ; CHECK-NEXT: $q0 = COPY %shuf(<4 x s32>)
    ; CHECK-NEXT: RET_ReallyLR implicit $q0
    %lane:_(s32) = COPY $w0
    %b:_(s32) = COPY $w1
    %c:_(s32) = COPY $w2
    %d:_(s32) = COPY $w3
    %undef:_(<4 x s32>) = G_IMPLICIT_DEF
    %buildvec:_(<4 x s32>) = G_BUILD_VECTOR %lane, %b, %c, %d
    %shuf:_(<4 x s32>) = G_SHUFFLE_VECTOR %buildvec(<4 x s32>), %undef, shufflemask(0, 0, 0, 0)
    $q0 = COPY %shuf(<4 x s32>)
    RET_ReallyLR implicit $q0
 
...
---
name:            build_vector_rhs
alignment:       4
legalized:       true
tracksRegLiveness: true
body:             |
  bb.1.entry:
    liveins: $w0, $w1, $w2, $w3, $w4
    ; The G_SHUFFLE_VECTOR is fed by a G_BUILD_VECTOR, and the 0th input
    ; operand is not a constant. We should get a G_DUP.
    ;
    ; CHECK-LABEL: name: build_vector
    ; CHECK: liveins: $w0, $w1, $w2, $w3, $w4
    ; CHECK: %lane_1:_(s32) = COPY $w1
    ; CHECK: %shuf:_(<4 x s32>) = G_DUP %lane_1(s32)
    ; CHECK: $q0 = COPY %shuf(<4 x s32>)
    ; CHECK: RET_ReallyLR implicit $q0
    %lane_0:_(s32) = COPY $w0
    %lane_1:_(s32) = COPY $w1
    %b:_(s32) = COPY $w2
    %c:_(s32) = COPY $w3
    %d:_(s32) = COPY $w4
    %buildvec0:_(<4 x s32>) = G_BUILD_VECTOR %lane_0(s32), %b(s32), %c(s32), %d(s32)
    %buildvec1:_(<4 x s32>) = G_BUILD_VECTOR %lane_1(s32), %b(s32), %c(s32), %d(s32)
    %shuf:_(<4 x s32>) = G_SHUFFLE_VECTOR %buildvec0(<4 x s32>), %buildvec1, shufflemask(4, 4, 4, 4)
    $q0 = COPY %shuf(<4 x s32>)
    RET_ReallyLR implicit $q0