llvm/llvm/test/CodeGen/ARM/ifcvt-size.mir

# RUN: llc -mtriple=thumbv8a-unknown-linux-gnueabi %s -o - -run-pass=if-converter -debug-only=if-converter | FileCheck %s
# RUN: llc -mtriple=thumbv7-unknown-linux-gnueabi %s -o - -run-pass=if-converter -debug-only=if-converter 2>%t| FileCheck %s
# RUN: FileCheck %s < %t --check-prefix=DEBUG
# REQUIRES: asserts

# When optimising for size, we use a different set of heuristics for
# if-conversion, which take into account the size of the instructions, not the
# time taken to execute them. This is more complicated for Thumb, where it if
# also affected by selection of narrow branch instructions, insertion if IT
# instructions, and selection of the CB(N)Z instructions.

--- |

  define void @fn1() minsize {
  entry:
    unreachable
  if.then:
    unreachable
  if.else:
    unreachable
  if.end:
    unreachable
  }

  define void @fn2() minsize {
  entry:
    unreachable
  if.then:
    unreachable
  if.else:
    unreachable
  if.end:
    unreachable
  }

  define void @fn3() minsize {
  entry:
    unreachable
  if.then:
    unreachable
  if.else:
    unreachable
  if.end:
    unreachable
  }

  define void @fn4() minsize "target-features"="-thumb-mode" {
  entry:
    unreachable
  if.then:
    unreachable
  if.else:
    unreachable
  if.end:
    unreachable
  }

  define void @fn5() minsize {
  entry:
    unreachable
  if.then:
    unreachable
  if.else:
    unreachable
  if.end:
    unreachable
  }

  define void @fn6() minsize {
  entry:
    unreachable
  if.then:
    unreachable
  if.else:
    unreachable
  if2.then:
    unreachable
  if2.else:
    unreachable
  }

  define void @fn7() minsize "target-features"="-thumb-mode" {
  entry:
    unreachable
  if.then:
    unreachable
  if.else:
    unreachable
  if.end:
    unreachable
  }

  define void @fn8() minsize {
  entry:
    unreachable
  if.then:
    unreachable
  if.else:
    unreachable
  if.end:
    unreachable
  }

  define void @fn9() minsize {
  entry:
    unreachable
  if.then:
    unreachable
  if.else:
    unreachable
  lab1:
    unreachable
  }
...
---
name:            fn1
alignment:       1
tracksRegLiveness: true

# If-conversion is profitable here because it will remove two branches of 2
# bytes each (assuming they can become narrow branches later), and will only
# add 2 bytes with the IT instruction.

# CHECK-LABEL: name:            fn1
# CHECK:      t2CMPri
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRSHi12
# CHECK-NEXT: t2MOVi

# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn1'
# DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=0, NumPredicatedInstructions=4, ExtraPredicateBytes=2)

body:             |
  bb.0.entry:
    successors: %bb.1(0x40000000), %bb.2(0x40000000)
    liveins: $r0, $r1, $r2, $r3

    t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
    t2Bcc %bb.2, 11, killed $cpsr

  bb.1.if.then:
    successors: %bb.3(0x80000000)
    liveins: $r0, $r3

    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
    t2B %bb.3, 14, $noreg

  bb.2.if.else:
    successors: %bb.3(0x80000000)
    liveins: $r1, $r3

    renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg
    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg

  bb.3.if.end:
    liveins: $r0, $r3

    renamable $r1 = t2MOVi 0, 14, $noreg, $noreg
    t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
    tBX_RET 14, $noreg, implicit $r0

---
name:            fn2
alignment:       1
tracksRegLiveness: true

# If-conversion is not profitable here, because the 5 conditional instructions
# would require 2 IT instructions.

# CHECK-LABEL: name:            fn2
# CHECK:      t2CMPri
# CHECK-NEXT: t2Bcc

# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn2'
# DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=0, NumPredicatedInstructions=5, ExtraPredicateBytes=4)

body:             |
  bb.0.entry:
    successors: %bb.1(0x40000000), %bb.2(0x40000000)
    liveins: $r0, $r1, $r2, $r3

    t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
    t2Bcc %bb.2, 11, killed $cpsr

  bb.1.if.then:
    successors: %bb.3(0x80000000)
    liveins: $r0, $r3

    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
    t2B %bb.3, 14, $noreg

  bb.2.if.else:
    successors: %bb.3(0x80000000)
    liveins: $r1, $r3

    renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg
    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg

  bb.3.if.end:
    liveins: $r0, $r3

    renamable $r1 = t2MOVi 0, 14, $noreg, $noreg
    t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
    tBX_RET 14, $noreg, implicit $r0

---
name:            fn3
alignment:       1
tracksRegLiveness: true

# Here, the true and false blocks both end in a tBX_RET instruction. One of
# these will be removed, saving 2 bytes, and the remaining one isn't
# conditional, so doesn't push us over the limit of 4 instructions in an IT
# block.

# CHECK-LABEL: name:            fn3
# CHECK:      t2CMPri
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRSHi12
# CHECK-NEXT: tBX_RET

# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn3'
# DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=2, NumPredicatedInstructions=4, ExtraPredicateBytes=2)

body:             |
  bb.0.entry:
    successors: %bb.1(0x40000000), %bb.2(0x40000000)
    liveins: $r0, $r1, $r2, $r3

    t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
    t2Bcc %bb.2, 11, killed $cpsr

  bb.1.if.then:
    liveins: $r0, $r3

    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
    tBX_RET 14, $noreg, implicit $r0

  bb.2.if.else:
    liveins: $r1, $r3

    renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg
    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
    tBX_RET 14, $noreg, implicit $r0

---
name:            fn4
alignment:       1
tracksRegLiveness: true

# This is the same as fn2, but compiled for ARM, which doesn't need IT
# instructions, so if-conversion is profitable.

# CHECK-LABEL: name:            fn4
# CHECK:      CMPri
# CHECK-NEXT: LDRi12
# CHECK-NEXT: LDRi12
# CHECK-NEXT: LDRSH
# CHECK-NEXT: LDRi12
# CHECK-NEXT: LDRi12
# CHECK-NEXT: MOVi

# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn4'
# DEBUG: MeetIfcvtSizeLimit(BranchBytes=8, CommonBytes=0, NumPredicatedInstructions=5, ExtraPredicateBytes=0)

body:             |
  bb.0.entry:
    successors: %bb.1(0x40000000), %bb.2(0x40000000)
    liveins: $r0, $r1, $r2, $r3

    CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
    Bcc %bb.2, 11, killed $cpsr

  bb.1.if.then:
    successors: %bb.3(0x80000000)
    liveins: $r0, $r3

    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
    B %bb.3

  bb.2.if.else:
    successors: %bb.3(0x80000000)
    liveins: $r1, $r3

    renamable $r0 = LDRi12 killed renamable $r1, 0, 14, $noreg
    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = LDRSH killed renamable $r0, $noreg, 0, 14, $noreg

  bb.3.if.end:
    liveins: $r0, $r3

    renamable $r1 = MOVi 0, 14, $noreg, $noreg
    STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
    BX_RET 14, $noreg, implicit $r0

---
name:            fn5
alignment:       1
tracksRegLiveness: true

# Here, the compare and conditional branch can be turned into a CBZ, so we
# don't want to if-convert.

# CHECK-LABEL: name:            fn5
# CHECK: t2CMPri
# CHECK: t2Bcc

# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn5'
# DEBUG: MeetIfcvtSizeLimit(BranchBytes=0, CommonBytes=2, NumPredicatedInstructions=4, ExtraPredicateBytes=2)

body:             |
  bb.0.entry:
    successors: %bb.1(0x30000000), %bb.2(0x50000000)
    liveins: $r0, $r1, $r2

    t2CMPri killed renamable $r2, 0, 14, $noreg, implicit-def $cpsr
    t2Bcc %bb.2, 1, killed $cpsr

  bb.1.if.then:
    liveins: $r0

    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
    tBX_RET 14, $noreg, implicit $r0

  bb.2.if.else:
    liveins: $r1

    renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg
    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
    tBX_RET 14, $noreg, implicit $r0

---
name:            fn6
alignment:       1
tracksRegLiveness: true

# This is a forked-diamond pattern, we recognise that the conditional branches
# at the ends of the true and false blocks are the same, and can be shared.

# CHECK-LABEL: name:            fn6
# CHECK:      t2CMPri
# CHECK-NEXT: t2LDRSHi12
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2CMPri
# CHECK-NEXT: t2Bcc

# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn6'
# DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=12, NumPredicatedInstructions=4, ExtraPredicateBytes=2)

body:             |
  bb.0.entry:
    successors: %bb.1(0x30000000), %bb.2(0x50000000)
    liveins: $r0, $r1, $r2, $r3

    t2CMPri killed renamable $r2, 4, 14, $noreg, implicit-def $cpsr
    t2Bcc %bb.2, 1, killed $cpsr

  bb.1.if.then:
    successors: %bb.3(0x30000000), %bb.4(0x50000000)
    liveins: $r0, $r1, $r3

    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
    t2CMPri renamable $r0, 0, 14, $noreg, implicit-def $cpsr
    t2Bcc %bb.3.if2.then, 1, killed $cpsr
    t2B %bb.4.if2.else, 14, $noreg

  bb.2.if.else:
    successors: %bb.3(0x30000000), %bb.4(0x50000000)
    liveins: $r0, $r1, $r3

    renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
    t2CMPri renamable $r0, 0, 14, $noreg, implicit-def $cpsr
    t2Bcc %bb.3.if2.then, 1, killed $cpsr
    t2B %bb.4.if2.else, 14, $noreg

  bb.3.if2.then:
    liveins: $r0, $r1, $r3

    t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
    tBX_RET 14, $noreg, implicit $r0

  bb.4.if2.else:
    liveins: $r0

    tBX_RET 14, $noreg, implicit $r0

---
name:            fn7
alignment:       1
tracksRegLiveness: true

# When compiling for ARM, it would be good for code size to generate very long
# runs of conditional instructions, but we put an (arbitrary) limit on this to
# avoid generating code which is very bad for performance, and only saves a few
# bytes of code size.

# CHECK-LABEL: name:            fn7
# CHECK:      CMPri
# CHECK-NEXT: Bcc

body:             |
  bb.0.entry:
    successors: %bb.1(0x40000000), %bb.2(0x40000000)
    liveins: $r0, $r1, $r2, $r3

    CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
    Bcc %bb.2, 11, killed $cpsr

  bb.1.if.then:
    successors: %bb.3(0x80000000)
    liveins: $r0, $r3

    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
    B %bb.3

  bb.2.if.else:
    successors: %bb.3(0x80000000)
    liveins: $r1, $r3

    renamable $r0 = LDRi12 killed renamable $r1, 0, 14, $noreg
    renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = LDRSH killed renamable $r0, $noreg, 0, 14, $noreg

  bb.3.if.end:
    liveins: $r0, $r3

    renamable $r1 = MOVi 0, 14, $noreg, $noreg
    STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
    BX_RET 14, $noreg, implicit $r0

---
name:            fn8
alignment:       1
tracksRegLiveness: true

# The first t2LDRi12 instruction in each branch is the same, so one copy of it
# will be removed, and it doesn't need to be predicated, keeping us under the 4
# instruction IT block limit.

# CHECK-LABEL: name:            fn8
# CHECK:      t2CMPri
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRSHi12
# CHECK-NEXT: t2MOVi

# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn8'
# DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=4, NumPredicatedInstructions=4, ExtraPredicateBytes=2)

body:             |
  bb.0.entry:
    successors: %bb.1(0x40000000), %bb.2(0x40000000)
    liveins: $r0, $r1, $r2, $r3

    t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
    t2Bcc %bb.2, 11, killed $cpsr

  bb.1.if.then:
    successors: %bb.3(0x80000000)
    liveins: $r0, $r3

    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = t2LDRi12 killed renamable $r0, 4, 14, $noreg
    t2B %bb.3, 14, $noreg

  bb.2.if.else:
    successors: %bb.3(0x80000000)
    liveins: $r0, $r3

    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
    renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg

  bb.3.if.end:
    liveins: $r0, $r3

    renamable $r1 = t2MOVi 0, 14, $noreg, $noreg
    t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
    tBX_RET 14, $noreg, implicit $r0

---
name:            fn9
alignment:       2
tracksRegLiveness: true

# The INLINEASM_BR instructions aren't analyzable, but they are identical so we
# can still do diamond if-conversion. From a code-size POV, they are common
# instructions, so one will be removed, and they don't need an IT block slot.

# CHECK-LABEL: name:            fn9
# CHECK:      tCMPi8
# CHECK-NEXT: tLDRi
# CHECK-NEXT: tLDRi
# CHECK-NEXT: tLDRi
# CHECK-NEXT: t2LDRSHi12
# CHECK-NEXT: INLINEASM_BR

# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn9'
# DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=8, NumPredicatedInstructions=4, ExtraPredicateBytes=2)

body:             |
  bb.0.entry:
    successors: %bb.1(0x30000000), %bb.3(0x50000000)
    liveins: $r0, $r1, $r2

    tCMPi8 renamable $r2, 42, 14, $noreg, implicit-def $cpsr
    t2Bcc %bb.3, 1, killed $cpsr

  bb.1.if.then:
    successors:  %bb.5(0x7fffffff)
    liveins: $r0, $r2

    renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg
    INLINEASM_BR &"b ${0:l}", 1, 13, blockaddress(@fn9, %ir-block.lab1)
    tBX_RET 14, $noreg, implicit $r2

  bb.3.if.else:
    successors: %bb.5(0x7fffffff)
    liveins: $r1, $r2

    renamable $r0 = tLDRi killed renamable $r1, 0, 14, $noreg
    renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg
    renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
    INLINEASM_BR &"b ${0:l}", 1, 13, blockaddress(@fn9, %ir-block.lab1)
    tBX_RET 14, $noreg, implicit $r2
    
  bb.5.lab1 (ir-block-address-taken %ir-block.lab1):
    liveins: $r0

    renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 5, 14, $noreg
    tBX_RET 14, $noreg, implicit $r0
...