# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s
--- |
source_filename = "skip-vpt-debug.c"
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8.1m.main-arm-none-eabihf"
define hidden void @arm_max_no_idx_f32(ptr nocapture readonly %pSrc, i32 %blockSize, ptr nocapture %pResult) local_unnamed_addr #0 !dbg !13 {
entry:
call void @llvm.dbg.value(metadata ptr %pSrc, metadata !24, metadata !DIExpression()), !dbg !29
call void @llvm.dbg.value(metadata i32 %blockSize, metadata !25, metadata !DIExpression()), !dbg !29
call void @llvm.dbg.value(metadata ptr %pResult, metadata !26, metadata !DIExpression()), !dbg !29
call void @llvm.dbg.value(metadata float 0x3810000000000000, metadata !27, metadata !DIExpression()), !dbg !29
%cmp.not7 = icmp eq i32 %blockSize, 0, !dbg !30
br i1 %cmp.not7, label %while.end, label %vector.ph, !dbg !31
vector.ph: ; preds = %entry
%n.rnd.up = add i32 %blockSize, 3, !dbg !31
%n.vec = and i32 %n.rnd.up, -4, !dbg !31
%0 = add i32 %n.vec, -4, !dbg !31
%1 = lshr i32 %0, 2, !dbg !31
%2 = add nuw nsw i32 %1, 1, !dbg !31
%3 = call i32 @llvm.start.loop.iterations.i32(i32 %2), !dbg !31
br label %vector.body, !dbg !31
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv1 = phi ptr [ %scevgep, %vector.body ], [ %pSrc, %vector.ph ]
%vec.phi = phi <4 x float> [ <float 0x3810000000000000, float 0x3810000000000000, float 0x3810000000000000, float 0x3810000000000000>, %vector.ph ], [ %10, %vector.body ]
%4 = phi i32 [ %3, %vector.ph ], [ %11, %vector.body ]
%5 = phi i32 [ %blockSize, %vector.ph ], [ %7, %vector.body ]
%lsr.iv12 = bitcast ptr %lsr.iv1 to ptr
%6 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %5)
%7 = sub i32 %5, 4
%wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %lsr.iv12, i32 4, <4 x i1> %6, <4 x float> poison), !dbg !32, !tbaa !34
%8 = fcmp nnan ninf nsz olt <4 x float> %vec.phi, %wide.masked.load, !dbg !38
%9 = and <4 x i1> %6, %8, !dbg !40
%10 = select <4 x i1> %9, <4 x float> %wide.masked.load, <4 x float> %vec.phi, !dbg !40
%scevgep = getelementptr float, ptr %lsr.iv1, i32 4
%11 = call i32 @llvm.loop.decrement.reg.i32(i32 %4, i32 1)
%12 = icmp ne i32 %11, 0
br i1 %12, label %vector.body, label %middle.block, !llvm.loop !41
middle.block: ; preds = %vector.body
%13 = call nnan ninf nsz float @llvm.vector.reduce.fmax.v4f32(<4 x float> %10), !dbg !31
br label %while.end, !dbg !45
while.end: ; preds = %middle.block, %entry
%maxValue.0.lcssa = phi float [ 0x3810000000000000, %entry ], [ %13, %middle.block ], !dbg !29
store float %maxValue.0.lcssa, ptr %pResult, align 4, !dbg !45, !tbaa !34
ret void, !dbg !46
}
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) #2
declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) #3
declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) #2
declare i32 @llvm.start.loop.iterations.i32(i32) #4
declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #4
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #5
attributes #0 = { nofree norecurse nounwind optsize "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" "frame-pointer"="none" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-dotprod,-fp16fml,-hwdiv-arm,-i8mm,-sb,-sha2" }
attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
attributes #2 = { nofree nosync nounwind readnone willreturn }
attributes #3 = { argmemonly nofree nosync nounwind readonly willreturn }
attributes #4 = { noduplicate nofree nosync nounwind willreturn }
attributes #5 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6, !7, !8, !9, !10, !11}
!llvm.ident = !{!12}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Component: ARM Compiler 6.17.0.0 (permissive) Tool: armclang [00000000]", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "skip-vpt-debug.c", directory: "/home/vicspe01")
!2 = !{}
!3 = !{i32 7, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"wchar_size", i32 4}
!6 = !{i32 1, !"static_rwdata", i32 1}
!7 = !{i32 1, !"enumsize_buildattr", i32 2}
!8 = !{i32 1, !"armlib_unavailable", i32 0}
!9 = !{i32 8, !"branch-target-enforcement", i32 0}
!10 = !{i32 8, !"sign-return-address", i32 0}
!11 = !{i32 8, !"sign-return-address-all", i32 0}
!12 = !{!"Component: ARM Compiler 6.17.0.0 (permissive) Tool: armclang [00000000]"}
!13 = distinct !DISubprogram(name: "arm_max_no_idx_f32", scope: !1, file: !1, line: 5, type: !14, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !23)
!14 = !DISubroutineType(types: !15)
!15 = !{null, !16, !20, !22}
!16 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !17, size: 32)
!17 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !18)
!18 = !DIDerivedType(tag: DW_TAG_typedef, name: "float32_t", file: !1, line: 1, baseType: !19)
!19 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
!20 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint32_t", file: !1, line: 2, baseType: !21)
!21 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
!22 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !18, size: 32)
!23 = !{!24, !25, !26, !27, !28}
!24 = !DILocalVariable(name: "pSrc", arg: 1, scope: !13, file: !1, line: 5, type: !16)
!25 = !DILocalVariable(name: "blockSize", arg: 2, scope: !13, file: !1, line: 5, type: !20)
!26 = !DILocalVariable(name: "pResult", arg: 3, scope: !13, file: !1, line: 6, type: !22)
!27 = !DILocalVariable(name: "maxValue", scope: !13, file: !1, line: 7, type: !18)
!28 = !DILocalVariable(name: "newVal", scope: !13, file: !1, line: 8, type: !18)
!29 = !DILocation(line: 0, scope: !13)
!30 = !DILocation(line: 10, column: 20, scope: !13)
!31 = !DILocation(line: 10, column: 3, scope: !13)
!32 = !DILocation(line: 11, column: 14, scope: !33)
!33 = distinct !DILexicalBlock(scope: !13, file: !1, line: 10, column: 26)
!34 = !{!35, !35, i64 0}
!35 = !{!"float", !36, i64 0}
!36 = !{!"omnipotent char", !37, i64 0}
!37 = !{!"Simple C/C++ TBAA"}
!38 = !DILocation(line: 12, column: 18, scope: !39)
!39 = distinct !DILexicalBlock(scope: !33, file: !1, line: 12, column: 9)
!40 = !DILocation(line: 12, column: 9, scope: !33)
!41 = distinct !{!41, !31, !42, !43, !44}
!42 = !DILocation(line: 15, column: 3, scope: !13)
!43 = !{!"llvm.loop.mustprogress"}
!44 = !{!"llvm.loop.isvectorized", i32 1}
!45 = !DILocation(line: 16, column: 12, scope: !13)
!46 = !DILocation(line: 17, column: 1, scope: !13)
...
---
name: arm_max_no_idx_f32
alignment: 4
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins:
- { reg: '$r0', virtual-reg: '' }
- { reg: '$r1', virtual-reg: '' }
- { reg: '$r2', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 8
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack:
- { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
callSites: []
debugValueSubstitutions: []
constants:
- id: 0
value: float 0x3810000000000000
alignment: 4
isTargetSpecific: false
machineFunctionInfo: {}
body: |
; CHECK-LABEL: name: arm_max_no_idx_f32
; CHECK: bb.0.entry:
; CHECK-NEXT: successors: %bb.4(0x30000000), %bb.1(0x50000000)
; CHECK-NEXT: liveins: $lr, $r0, $r1, $r2, $r7
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
; CHECK-NEXT: DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
; CHECK-NEXT: DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
; CHECK-NEXT: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
; CHECK-NEXT: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
; CHECK-NEXT: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r7, -8
; CHECK-NEXT: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
; CHECK-NEXT: DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
; CHECK-NEXT: tCBZ renamable $r1, %bb.4, debug-location !31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.vector.ph:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: liveins: $r0, $r1, $r2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
; CHECK-NEXT: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
; CHECK-NEXT: DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
; CHECK-NEXT: DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
; CHECK-NEXT: renamable $q0 = MVE_VMOVimmi32 1152, 0, $noreg, $noreg, undef renamable $q0
; CHECK-NEXT: $lr = MVE_DLSTP_32 killed renamable $r1, debug-location !31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.vector.body (align 4):
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK-NEXT: liveins: $lr, $q0, $r0, $r2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
; CHECK-NEXT: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
; CHECK-NEXT: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg, $noreg, debug-location !32 :: (load (s128) from %ir.lsr.iv12, align 4, !tbaa !34)
; CHECK-NEXT: DBG_VALUE $r0, $noreg, !24, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29
; CHECK-NEXT: MVE_VPTv4f32 8, renamable $q1, renamable $q0, 12, implicit-def $vpr, debug-location !40
; CHECK-NEXT: renamable $q0 = MVE_VORR killed renamable $q1, killed renamable $q1, 1, killed renamable $vpr, $noreg, killed renamable $q0, debug-location !40
; CHECK-NEXT: DBG_VALUE $r1, $noreg, !25, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29
; CHECK-NEXT: $lr = MVE_LETP killed renamable $lr, %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.middle.block:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: liveins: $q0, $r2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
; CHECK-NEXT: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
; CHECK-NEXT: renamable $s4 = nnan ninf nsz VFP_VMAXNMS renamable $s2, renamable $s3, debug-location !31
; CHECK-NEXT: renamable $s0 = nnan ninf nsz VFP_VMAXNMS killed renamable $s0, killed renamable $s1, implicit killed $q0, debug-location !31
; CHECK-NEXT: renamable $s0 = nnan ninf nsz VFP_VMAXNMS killed renamable $s0, killed renamable $s4, debug-location !31
; CHECK-NEXT: tB %bb.5, 14 /* CC::al */, $noreg
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: liveins: $r2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
; CHECK-NEXT: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
; CHECK-NEXT: DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
; CHECK-NEXT: DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
; CHECK-NEXT: renamable $s0 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5.while.end:
; CHECK-NEXT: liveins: $r2, $s0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
; CHECK-NEXT: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
; CHECK-NEXT: VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg, debug-location !45 :: (store (s32) into %ir.pResult, !tbaa !34)
; CHECK-NEXT: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, debug-location !46
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6 (align 4):
; CHECK-NEXT: CONSTPOOL_ENTRY 0, %const.0, 4
bb.0.entry:
successors: %bb.4(0x30000000), %bb.1(0x50000000)
liveins: $r0, $r1, $r2, $r7, $lr
DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8
DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
tCBZ renamable $r1, %bb.4, debug-location !31
bb.1.vector.ph:
successors: %bb.2(0x80000000)
liveins: $r0, $r1, $r2
DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg, debug-location !31
renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg, debug-location !31
renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg, debug-location !31
renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg, debug-location !31
renamable $q0 = MVE_VMOVimmi32 1152, 0, $noreg, $noreg, undef renamable $q0
renamable $lr = t2DoLoopStartTP killed renamable $r3, renamable $r1, debug-location !31
bb.2.vector.body (align 4):
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
liveins: $lr, $q0, $r0, $r1, $r2
DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg, $noreg
MVE_VPST 2, implicit $vpr, debug-location !32
renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr, $noreg, debug-location !32 :: (load (s128) from %ir.lsr.iv12, align 4, !tbaa !34)
DBG_VALUE $r0, $noreg, !24, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29
renamable $vpr = MVE_VCMPf32 renamable $q1, renamable $q0, 12, 1, killed renamable $vpr, $noreg, debug-location !40
renamable $q0 = MVE_VORR killed renamable $q1, renamable $q1, 1, killed renamable $vpr, $noreg, killed renamable $q0, debug-location !40
renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg
DBG_VALUE $r1, $noreg, !25, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29
renamable $lr = t2LoopEndDec killed renamable $lr, %bb.2, implicit-def dead $cpsr
tB %bb.3, 14 /* CC::al */, $noreg
bb.3.middle.block:
successors: %bb.5(0x80000000)
liveins: $q0, $r2
DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
renamable $s4 = nnan ninf nsz VFP_VMAXNMS renamable $s2, renamable $s3, debug-location !31
renamable $s0 = nnan ninf nsz VFP_VMAXNMS killed renamable $s0, killed renamable $s1, implicit $q0, debug-location !31
renamable $s0 = nnan ninf nsz VFP_VMAXNMS killed renamable $s0, killed renamable $s4, debug-location !31
tB %bb.5, 14 /* CC::al */, $noreg
bb.4:
successors: %bb.5(0x80000000)
liveins: $r2
DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
renamable $s0 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
bb.5.while.end:
liveins: $r2, $s0
DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg, debug-location !45 :: (store (s32) into %ir.pResult, !tbaa !34)
frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, debug-location !46
bb.6 (align 4):
CONSTPOOL_ENTRY 0, %const.0, 4
...