# RUN: llc -mtriple=thumbv8a-unknown-linux-gnueabi %s -o - -run-pass=if-converter -debug-only=if-converter | FileCheck %s
# RUN: llc -mtriple=thumbv7-unknown-linux-gnueabi %s -o - -run-pass=if-converter -debug-only=if-converter 2>%t| FileCheck %s
# RUN: FileCheck %s < %t --check-prefix=DEBUG
# REQUIRES: asserts
# When optimising for size, we use a different set of heuristics for
# if-conversion, which take into account the size of the instructions, not the
# time taken to execute them. This is more complicated for Thumb, where it if
# also affected by selection of narrow branch instructions, insertion if IT
# instructions, and selection of the CB(N)Z instructions.
--- |
define void @fn1() minsize {
entry:
unreachable
if.then:
unreachable
if.else:
unreachable
if.end:
unreachable
}
define void @fn2() minsize {
entry:
unreachable
if.then:
unreachable
if.else:
unreachable
if.end:
unreachable
}
define void @fn3() minsize {
entry:
unreachable
if.then:
unreachable
if.else:
unreachable
if.end:
unreachable
}
define void @fn4() minsize "target-features"="-thumb-mode" {
entry:
unreachable
if.then:
unreachable
if.else:
unreachable
if.end:
unreachable
}
define void @fn5() minsize {
entry:
unreachable
if.then:
unreachable
if.else:
unreachable
if.end:
unreachable
}
define void @fn6() minsize {
entry:
unreachable
if.then:
unreachable
if.else:
unreachable
if2.then:
unreachable
if2.else:
unreachable
}
define void @fn7() minsize "target-features"="-thumb-mode" {
entry:
unreachable
if.then:
unreachable
if.else:
unreachable
if.end:
unreachable
}
define void @fn8() minsize {
entry:
unreachable
if.then:
unreachable
if.else:
unreachable
if.end:
unreachable
}
define void @fn9() minsize {
entry:
unreachable
if.then:
unreachable
if.else:
unreachable
lab1:
unreachable
}
...
---
name: fn1
alignment: 1
tracksRegLiveness: true
# If-conversion is profitable here because it will remove two branches of 2
# bytes each (assuming they can become narrow branches later), and will only
# add 2 bytes with the IT instruction.
# CHECK-LABEL: name: fn1
# CHECK: t2CMPri
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRSHi12
# CHECK-NEXT: t2MOVi
# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn1'
# DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=0, NumPredicatedInstructions=4, ExtraPredicateBytes=2)
body: |
bb.0.entry:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
liveins: $r0, $r1, $r2, $r3
t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.2, 11, killed $cpsr
bb.1.if.then:
successors: %bb.3(0x80000000)
liveins: $r0, $r3
renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
t2B %bb.3, 14, $noreg
bb.2.if.else:
successors: %bb.3(0x80000000)
liveins: $r1, $r3
renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg
renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
bb.3.if.end:
liveins: $r0, $r3
renamable $r1 = t2MOVi 0, 14, $noreg, $noreg
t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
tBX_RET 14, $noreg, implicit $r0
---
name: fn2
alignment: 1
tracksRegLiveness: true
# If-conversion is not profitable here, because the 5 conditional instructions
# would require 2 IT instructions.
# CHECK-LABEL: name: fn2
# CHECK: t2CMPri
# CHECK-NEXT: t2Bcc
# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn2'
# DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=0, NumPredicatedInstructions=5, ExtraPredicateBytes=4)
body: |
bb.0.entry:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
liveins: $r0, $r1, $r2, $r3
t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.2, 11, killed $cpsr
bb.1.if.then:
successors: %bb.3(0x80000000)
liveins: $r0, $r3
renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
t2B %bb.3, 14, $noreg
bb.2.if.else:
successors: %bb.3(0x80000000)
liveins: $r1, $r3
renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg
renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
bb.3.if.end:
liveins: $r0, $r3
renamable $r1 = t2MOVi 0, 14, $noreg, $noreg
t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
tBX_RET 14, $noreg, implicit $r0
---
name: fn3
alignment: 1
tracksRegLiveness: true
# Here, the true and false blocks both end in a tBX_RET instruction. One of
# these will be removed, saving 2 bytes, and the remaining one isn't
# conditional, so doesn't push us over the limit of 4 instructions in an IT
# block.
# CHECK-LABEL: name: fn3
# CHECK: t2CMPri
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRSHi12
# CHECK-NEXT: tBX_RET
# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn3'
# DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=2, NumPredicatedInstructions=4, ExtraPredicateBytes=2)
body: |
bb.0.entry:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
liveins: $r0, $r1, $r2, $r3
t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.2, 11, killed $cpsr
bb.1.if.then:
liveins: $r0, $r3
renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
tBX_RET 14, $noreg, implicit $r0
bb.2.if.else:
liveins: $r1, $r3
renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg
renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
tBX_RET 14, $noreg, implicit $r0
---
name: fn4
alignment: 1
tracksRegLiveness: true
# This is the same as fn2, but compiled for ARM, which doesn't need IT
# instructions, so if-conversion is profitable.
# CHECK-LABEL: name: fn4
# CHECK: CMPri
# CHECK-NEXT: LDRi12
# CHECK-NEXT: LDRi12
# CHECK-NEXT: LDRSH
# CHECK-NEXT: LDRi12
# CHECK-NEXT: LDRi12
# CHECK-NEXT: MOVi
# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn4'
# DEBUG: MeetIfcvtSizeLimit(BranchBytes=8, CommonBytes=0, NumPredicatedInstructions=5, ExtraPredicateBytes=0)
body: |
bb.0.entry:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
liveins: $r0, $r1, $r2, $r3
CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
Bcc %bb.2, 11, killed $cpsr
bb.1.if.then:
successors: %bb.3(0x80000000)
liveins: $r0, $r3
renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
B %bb.3
bb.2.if.else:
successors: %bb.3(0x80000000)
liveins: $r1, $r3
renamable $r0 = LDRi12 killed renamable $r1, 0, 14, $noreg
renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = LDRSH killed renamable $r0, $noreg, 0, 14, $noreg
bb.3.if.end:
liveins: $r0, $r3
renamable $r1 = MOVi 0, 14, $noreg, $noreg
STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
BX_RET 14, $noreg, implicit $r0
---
name: fn5
alignment: 1
tracksRegLiveness: true
# Here, the compare and conditional branch can be turned into a CBZ, so we
# don't want to if-convert.
# CHECK-LABEL: name: fn5
# CHECK: t2CMPri
# CHECK: t2Bcc
# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn5'
# DEBUG: MeetIfcvtSizeLimit(BranchBytes=0, CommonBytes=2, NumPredicatedInstructions=4, ExtraPredicateBytes=2)
body: |
bb.0.entry:
successors: %bb.1(0x30000000), %bb.2(0x50000000)
liveins: $r0, $r1, $r2
t2CMPri killed renamable $r2, 0, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.2, 1, killed $cpsr
bb.1.if.then:
liveins: $r0
renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
tBX_RET 14, $noreg, implicit $r0
bb.2.if.else:
liveins: $r1
renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg
renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
tBX_RET 14, $noreg, implicit $r0
---
name: fn6
alignment: 1
tracksRegLiveness: true
# This is a forked-diamond pattern, we recognise that the conditional branches
# at the ends of the true and false blocks are the same, and can be shared.
# CHECK-LABEL: name: fn6
# CHECK: t2CMPri
# CHECK-NEXT: t2LDRSHi12
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2CMPri
# CHECK-NEXT: t2Bcc
# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn6'
# DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=12, NumPredicatedInstructions=4, ExtraPredicateBytes=2)
body: |
bb.0.entry:
successors: %bb.1(0x30000000), %bb.2(0x50000000)
liveins: $r0, $r1, $r2, $r3
t2CMPri killed renamable $r2, 4, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.2, 1, killed $cpsr
bb.1.if.then:
successors: %bb.3(0x30000000), %bb.4(0x50000000)
liveins: $r0, $r1, $r3
renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
t2CMPri renamable $r0, 0, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.3.if2.then, 1, killed $cpsr
t2B %bb.4.if2.else, 14, $noreg
bb.2.if.else:
successors: %bb.3(0x30000000), %bb.4(0x50000000)
liveins: $r0, $r1, $r3
renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
t2CMPri renamable $r0, 0, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.3.if2.then, 1, killed $cpsr
t2B %bb.4.if2.else, 14, $noreg
bb.3.if2.then:
liveins: $r0, $r1, $r3
t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
tBX_RET 14, $noreg, implicit $r0
bb.4.if2.else:
liveins: $r0
tBX_RET 14, $noreg, implicit $r0
---
name: fn7
alignment: 1
tracksRegLiveness: true
# When compiling for ARM, it would be good for code size to generate very long
# runs of conditional instructions, but we put an (arbitrary) limit on this to
# avoid generating code which is very bad for performance, and only saves a few
# bytes of code size.
# CHECK-LABEL: name: fn7
# CHECK: CMPri
# CHECK-NEXT: Bcc
body: |
bb.0.entry:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
liveins: $r0, $r1, $r2, $r3
CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
Bcc %bb.2, 11, killed $cpsr
bb.1.if.then:
successors: %bb.3(0x80000000)
liveins: $r0, $r3
renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
B %bb.3
bb.2.if.else:
successors: %bb.3(0x80000000)
liveins: $r1, $r3
renamable $r0 = LDRi12 killed renamable $r1, 0, 14, $noreg
renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = LDRSH killed renamable $r0, $noreg, 0, 14, $noreg
bb.3.if.end:
liveins: $r0, $r3
renamable $r1 = MOVi 0, 14, $noreg, $noreg
STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
BX_RET 14, $noreg, implicit $r0
---
name: fn8
alignment: 1
tracksRegLiveness: true
# The first t2LDRi12 instruction in each branch is the same, so one copy of it
# will be removed, and it doesn't need to be predicated, keeping us under the 4
# instruction IT block limit.
# CHECK-LABEL: name: fn8
# CHECK: t2CMPri
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRi12
# CHECK-NEXT: t2LDRSHi12
# CHECK-NEXT: t2MOVi
# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn8'
# DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=4, NumPredicatedInstructions=4, ExtraPredicateBytes=2)
body: |
bb.0.entry:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
liveins: $r0, $r1, $r2, $r3
t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.2, 11, killed $cpsr
bb.1.if.then:
successors: %bb.3(0x80000000)
liveins: $r0, $r3
renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = t2LDRi12 killed renamable $r0, 4, 14, $noreg
t2B %bb.3, 14, $noreg
bb.2.if.else:
successors: %bb.3(0x80000000)
liveins: $r0, $r3
renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg
renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
bb.3.if.end:
liveins: $r0, $r3
renamable $r1 = t2MOVi 0, 14, $noreg, $noreg
t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg
tBX_RET 14, $noreg, implicit $r0
---
name: fn9
alignment: 2
tracksRegLiveness: true
# The INLINEASM_BR instructions aren't analyzable, but they are identical so we
# can still do diamond if-conversion. From a code-size POV, they are common
# instructions, so one will be removed, and they don't need an IT block slot.
# CHECK-LABEL: name: fn9
# CHECK: tCMPi8
# CHECK-NEXT: tLDRi
# CHECK-NEXT: tLDRi
# CHECK-NEXT: tLDRi
# CHECK-NEXT: t2LDRSHi12
# CHECK-NEXT: INLINEASM_BR
# DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn9'
# DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=8, NumPredicatedInstructions=4, ExtraPredicateBytes=2)
body: |
bb.0.entry:
successors: %bb.1(0x30000000), %bb.3(0x50000000)
liveins: $r0, $r1, $r2
tCMPi8 renamable $r2, 42, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.3, 1, killed $cpsr
bb.1.if.then:
successors: %bb.5(0x7fffffff)
liveins: $r0, $r2
renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg
INLINEASM_BR &"b ${0:l}", 1, 13, blockaddress(@fn9, %ir-block.lab1)
tBX_RET 14, $noreg, implicit $r2
bb.3.if.else:
successors: %bb.5(0x7fffffff)
liveins: $r1, $r2
renamable $r0 = tLDRi killed renamable $r1, 0, 14, $noreg
renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg
renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg
INLINEASM_BR &"b ${0:l}", 1, 13, blockaddress(@fn9, %ir-block.lab1)
tBX_RET 14, $noreg, implicit $r2
bb.5.lab1 (ir-block-address-taken %ir-block.lab1):
liveins: $r0
renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 5, 14, $noreg
tBX_RET 14, $noreg, implicit $r0
...