llvm/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-shift-tbz-tbnz.mir

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -global-isel-abort=1 -verify-machineinstrs %s -o - | FileCheck %s
#
# Check folding a G_SHL into a G_BRCOND which has been matched as a TB(N)Z.
...
---
name:            fold_shl
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: fold_shl
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK-NEXT:   liveins: $x0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   %copy:gpr64all = COPY $x0
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr32all = COPY %copy.sub_32
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
  ; CHECK-NEXT:   TBNZW [[COPY1]], 2, %bb.1
  ; CHECK-NEXT:   B %bb.0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s64) = COPY $x0
    %bit:gpr(s64) = G_CONSTANT i64 8
    %zero:gpr(s64) = G_CONSTANT i64 0

    ; tbnz (shl x, 1), 3 == tbnz x, 2
    %fold_cst:gpr(s64) = G_CONSTANT i64 1
    %fold_me:gpr(s64) = G_SHL %copy, %fold_cst

    %and:gpr(s64) = G_AND %fold_me, %bit
    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
    G_BRCOND %cmp, %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR
...
---
name:            dont_fold_shl_1
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: dont_fold_shl_1
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK-NEXT:   liveins: $x0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   %copy:gpr64 = COPY $x0
  ; CHECK-NEXT:   %fold_me:gpr64 = UBFMXri %copy, 59, 58
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr32all = COPY %fold_me.sub_32
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
  ; CHECK-NEXT:   TBNZW [[COPY1]], 3, %bb.1
  ; CHECK-NEXT:   B %bb.0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s64) = COPY $x0
    %bit:gpr(s64) = G_CONSTANT i64 8
    %zero:gpr(s64) = G_CONSTANT i64 0

    ; 5 > 3, so we cannot do the transformation as above.
    %fold_cst:gpr(s64) = G_CONSTANT i64 5
    %fold_me:gpr(s64) = G_SHL %copy, %fold_cst

    %and:gpr(s64) = G_AND %fold_me, %bit
    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
    G_BRCOND %cmp, %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR
...
---
name:            dont_fold_shl_2
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: dont_fold_shl_2
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK-NEXT:   liveins: $x0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   %copy:gpr64 = COPY $x0
  ; CHECK-NEXT:   %fold_cst:gpr64 = MOVi64imm -5
  ; CHECK-NEXT:   %fold_me:gpr64 = LSLVXr %copy, %fold_cst
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr32all = COPY %fold_me.sub_32
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
  ; CHECK-NEXT:   TBNZW [[COPY1]], 3, %bb.1
  ; CHECK-NEXT:   B %bb.0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s64) = COPY $x0
    %bit:gpr(s64) = G_CONSTANT i64 8
    %zero:gpr(s64) = G_CONSTANT i64 0

    ; Same case as above, except we wrap around.
    %fold_cst:gpr(s64) = G_CONSTANT i64 -5
    %fold_me:gpr(s64) = G_SHL %copy, %fold_cst

    %and:gpr(s64) = G_AND %fold_me, %bit
    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
    G_BRCOND %cmp, %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR

...
---
name:            dont_fold_shl_3
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: dont_fold_shl_3
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK-NEXT:   liveins: $x0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   %copy:gpr64 = COPY $x0
  ; CHECK-NEXT:   %shl:gpr64 = UBFMXri %copy, 62, 61
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr32all = COPY %shl.sub_32
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
  ; CHECK-NEXT:   TBNZW [[COPY1]], 3, %bb.1
  ; CHECK-NEXT:   B %bb.0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   %second_use:gpr64sp = ORRXri %shl, 8000
  ; CHECK-NEXT:   $x0 = COPY %second_use
  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s64) = COPY $x0
    %bit:gpr(s64) = G_CONSTANT i64 8
    %zero:gpr(s64) = G_CONSTANT i64 0
    %fold_cst:gpr(s64) = G_CONSTANT i64 2

    ; Don't walk past the G_SHL when it's used more than once.
    %shl:gpr(s64) = G_SHL %copy, %fold_cst
    %and:gpr(s64) = G_AND %shl, %bit
    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
    G_BRCOND %cmp, %bb.1
    G_BR %bb.0

  bb.1:
    %second_use:gpr(s64) = G_OR %shl, %bit
    $x0 = COPY %second_use
    RET_ReallyLR implicit $x0

...
---
name:            fold_ashr_in_range
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: fold_ashr_in_range
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK-NEXT:   liveins: $x0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   %copy:gpr64all = COPY $x0
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr32all = COPY %copy.sub_32
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
  ; CHECK-NEXT:   TBNZW [[COPY1]], 4, %bb.1
  ; CHECK-NEXT:   B %bb.0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s64) = COPY $x0
    %bit:gpr(s64) = G_CONSTANT i64 8
    %zero:gpr(s64) = G_CONSTANT i64 0

    ; tb(n)z (ashr x, c), b == tbz(x, b + c) when b+c <= the size of the type.
    ; In this case, we should get 1 + 3 = 4 as the bit number.
    %fold_cst:gpr(s64) = G_CONSTANT i64 1
    %fold_me:gpr(s64) = G_ASHR %copy, %fold_cst

    %and:gpr(s64) = G_AND %fold_me, %bit
    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
    G_BRCOND %cmp, %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR

...
---
name:            fold_ashr_msb_1
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: fold_ashr_msb_1
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK-NEXT:   liveins: $x0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   %copy:gpr32 = COPY $w0
  ; CHECK-NEXT:   TBNZW %copy, 31, %bb.1
  ; CHECK-NEXT:   B %bb.0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s32) = COPY $w0
    %bit:gpr(s32) = G_CONSTANT i32 8
    %zero:gpr(s32) = G_CONSTANT i32 0

    ; We should get a TBNZW with a 31 as the bit.
    %fold_cst:gpr(s32) = G_CONSTANT i32 1234
    %fold_me:gpr(s32) = G_ASHR %copy, %fold_cst

    %and:gpr(s32) = G_AND %fold_me, %bit
    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s32), %zero
    G_BRCOND %cmp, %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR

...
---
name:            fold_ashr_msb_2
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: fold_ashr_msb_2
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK-NEXT:   liveins: $x0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   %copy:gpr64 = COPY $x0
  ; CHECK-NEXT:   TBNZX %copy, 63, %bb.1
  ; CHECK-NEXT:   B %bb.0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s64) = COPY $x0
    %bit:gpr(s64) = G_CONSTANT i64 8
    %zero:gpr(s64) = G_CONSTANT i64 0

    ; We should get a TBNZX with a 63 as the bit.
    %fold_cst:gpr(s64) = G_CONSTANT i64 1234
    %fold_me:gpr(s64) = G_ASHR %copy, %fold_cst

    %and:gpr(s64) = G_AND %fold_me, %bit
    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
    G_BRCOND %cmp, %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR

...
---
name:            fold_lshr
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: fold_lshr
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK-NEXT:   liveins: $x0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   %copy:gpr32 = COPY $w0
  ; CHECK-NEXT:   TBNZW %copy, 4, %bb.1
  ; CHECK-NEXT:   B %bb.0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s32) = COPY $w0
    %bit:gpr(s32) = G_CONSTANT i32 8
    %zero:gpr(s32) = G_CONSTANT i32 0

    ; We should get 4 as the test bit.
    %fold_cst:gpr(s32) = G_CONSTANT i32 1
    %fold_me:gpr(s32) = G_LSHR %copy, %fold_cst

    %and:gpr(s32) = G_AND %fold_me, %bit
    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s32), %zero
    G_BRCOND %cmp, %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR

...
---
name:            fold_lshr_2
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: fold_lshr_2
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK-NEXT:   liveins: $x0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   %copy:gpr64 = COPY $x0
  ; CHECK-NEXT:   TBNZX %copy, 32, %bb.1
  ; CHECK-NEXT:   B %bb.0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s64) = COPY $x0
    %bit:gpr(s64) = G_CONSTANT i64 8
    %zero:gpr(s64) = G_CONSTANT i64 0

    ; We're testing a s64.
    ; 3 + 29 = 32, which is less than 63, so we can fold.
    %fold_cst:gpr(s64) = G_CONSTANT i64 29
    %fold_me:gpr(s64) = G_LSHR %copy, %fold_cst

    %and:gpr(s64) = G_AND %fold_me, %bit
    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
    G_BRCOND %cmp, %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR

...
---
name:            dont_fold_lshr
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: dont_fold_lshr
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK-NEXT:   liveins: $x0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   %copy:gpr32 = COPY $w0
  ; CHECK-NEXT:   %fold_cst:gpr32 = MOVi32imm 29
  ; CHECK-NEXT:   %fold_me:gpr32 = LSRVWr %copy, %fold_cst
  ; CHECK-NEXT:   TBNZW %fold_me, 3, %bb.1
  ; CHECK-NEXT:   B %bb.0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s32) = COPY $w0
    %bit:gpr(s32) = G_CONSTANT i32 8
    %zero:gpr(s32) = G_CONSTANT i32 0

    ; We're testing a s32.
    ; 3 + 29 = 32, which is greater than 31, so we don't fold.
    %fold_cst:gpr(s32) = G_CONSTANT i32 29
    %fold_me:gpr(s32) = G_LSHR %copy, %fold_cst

    %and:gpr(s32) = G_AND %fold_me, %bit
    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s32), %zero
    G_BRCOND %cmp, %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR

...
---
name:            lshr_negative
alignment:       4
legalized:       true
regBankSelected: true
body:             |
  ; CHECK-LABEL: name: lshr_negative
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
  ; CHECK-NEXT:   liveins: $x0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   %copy:gpr32 = COPY $w0
  ; CHECK-NEXT:   TBNZW %copy, 2, %bb.1
  ; CHECK-NEXT:   B %bb.0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   RET_ReallyLR
  bb.0:
    successors: %bb.0, %bb.1
    liveins: $x0
    %copy:gpr(s32) = COPY $w0
    %bit:gpr(s32) = G_CONSTANT i32 8
    %zero:gpr(s32) = G_CONSTANT i32 0

    ; Constant becomes very large and wraps around. Since it's larger than the
    ; bit width, that means the LSHR is poison, so we can still fold.
    %fold_cst:gpr(s32) = G_CONSTANT i32 -1
    %fold_me:gpr(s32) = G_LSHR %copy, %fold_cst

    %and:gpr(s32) = G_AND %fold_me, %bit
    %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s32), %zero
    G_BRCOND %cmp, %bb.1
    G_BR %bb.0
  bb.1:
    RET_ReallyLR