# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v2 -mattr=+sve2-aes,+sve2-sha3,+sve2-sm4 --instruction-info=0 --resource-pressure=0 --timeline --timeline-max-iterations=2 < %s | FileCheck %s
# LLVM-MCA-BEGIN madd
mul x0, x0, x0
madd x0, x1, x2, x0
madd x0, x1, x2, x0
madd x0, x0, x0, x0
# LLVM-MCA-END
# LLVM-MCA-BEGIN smaddl
mul x0, x0, x0
smaddl x0, w1, w2, x0
smaddl x0, w1, w2, x0
smaddl x0, w0, w0, x0
# LLVM-MCA-END
# LLVM-MCA-BEGIN fmadd
fadd d0, d0, d0
fmadd d0, d1, d2, d0
fmul d0, d0, d0
fmadd d0, d1, d2, d0
fmadd d0, d1, d2, d0
fmadd d0, d0, d1, d2
# LLVM-MCA-END
# LLVM-MCA-BEGIN saba
mul v0.4s, v0.4s, v0.4s
saba v0.4s, v1.4s, v2.4s
saba v0.4s, v1.4s, v2.4s
saba v0.4s, v0.4s, v1.4s
# LLVM-MCA-END
# LLVM-MCA-BEGIN sdot
mul v0.4s, v0.4s, v0.4s
sdot v0.4s, v1.16b, v2.16b
sdot v0.4s, v1.16b, v2.16b
sdot v0.4s, v0.16b, v1.16b
# LLVM-MCA-END
# LLVM-MCA-BEGIN smmla
mul v0.4s, v0.4s, v0.4s
smmla v0.4s, v1.16b, v2.16b
smmla v0.4s, v1.16b, v2.16b
smmla v0.4s, v0.16b, v1.16b
# LLVM-MCA-END
# LLVM-MCA-BEGIN mla
mul v0.4s, v0.4s, v0.4s
mla v0.4s, v1.4s, v2.4s
mla v0.4s, v1.4s, v2.4s
mla v0.4s, v0.4s, v1.4s
# LLVM-MCA-END
# LLVM-MCA-BEGIN sqrdmlah
mul v0.4s, v0.4s, v0.4s
sqrdmlah v0.4s, v1.4s, v2.4s
sqrdmlah v0.4s, v1.4s, v2.4s
sqrdmlah v0.4s, v0.4s, v1.4s
# LLVM-MCA-END
# LLVM-MCA-BEGIN smlal2
mul v0.4s, v0.4s, v0.4s
smlal2 v0.4s, v1.8h, v2.8h
smlal2 v0.4s, v1.8h, v2.8h
smlal2 v0.4s, v0.8h, v1.8h
# LLVM-MCA-END
# LLVM-MCA-BEGIN sadalp
mul v0.4s, v0.4s, v0.4s
sadalp v0.2d, v1.4s
sadalp v0.2d, v1.4s
sadalp v0.2d, v0.4s
# LLVM-MCA-END
# LLVM-MCA-BEGIN ssra
mul v0.4s, v0.4s, v0.4s
ssra v0.2d, v1.2d, #1
ssra v0.2d, v1.2d, #1
ssra v0.2d, v0.2d, #1
# LLVM-MCA-END
# LLVM-MCA-BEGIN fcmla
fmul v0.4s, v0.4s, v0.4s
fcmla v0.2d, v1.2d, v2.2d, #90
fcmla v0.2d, v1.2d, v2.2d, #90
fcmla v0.2d, v0.2d, v1.2d, #90
# LLVM-MCA-END
# LLVM-MCA-BEGIN fmla
fmul v0.2d, v0.2d, v0.2d
fmla v0.2d, v1.2d, v2.2d
fadd v0.2d, v0.2d, v0.2d
fmla v0.2d, v1.2d, v2.2d
fmla v0.2d, v1.2d, v2.2d
fmla v0.2d, v0.2d, v1.2d
# LLVM-MCA-END
# LLVM-MCA-BEGIN fmlal
fmul v0.2d, v0.2d, v0.2d
fmlal v0.4s, v1.4h, v2.4h
fadd v0.2d, v0.2d, v0.2d
fmlal v0.4s, v1.4h, v2.4h
fmlal v0.4s, v1.4h, v2.4h
fmlal v0.4s, v0.4h, v1.4h
# LLVM-MCA-END
# LLVM-MCA-BEGIN bfdot
fmul v0.2d, v0.2d, v0.2d
bfdot v0.4s, v1.8h, v2.8h
bfdot v0.4s, v1.8h, v2.8h
bfdot v0.4s, v0.8h, v1.8h
# LLVM-MCA-END
# LLVM-MCA-BEGIN bfmmla
fmul v0.2d, v0.2d, v0.2d
bfmmla v0.4s, v1.8h, v2.8h
bfmmla v0.4s, v1.8h, v2.8h
bfmmla v0.4s, v0.8h, v1.8h
# LLVM-MCA-END
# LLVM-MCA-BEGIN bfmlalb
fmul v0.2d, v0.2d, v0.2d
bfmlalb v0.4s, v1.8h, v2.8h
bfmlalb v0.4s, v1.8h, v2.8h
bfmlalb v0.4s, v0.8h, v1.8h
# LLVM-MCA-END
# LLVM-MCA-BEGIN crc32b
mul w0, w0, w0
crc32b w0, w0, w1
crc32b w0, w0, w1
crc32b w0, w0, w0
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z saba
mul z0.d, z0.d, z0.d
saba z0.d, z1.d, z2.d
saba z0.d, z1.d, z2.d
saba z0.d, z0.d, z1.d
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z sadalp
mul z0.d, z0.d, z0.d
sadalp z0.d, p0/m, z1.s
sadalp z0.d, p0/m, z1.s
sadalp z0.d, p0/m, z0.s
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z ssra
mul z0.d, z0.d, z0.d
ssra z0.d, z1.d, #1
ssra z0.d, z1.d, #1
ssra z0.d, z0.d, #1
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z cdot.s
mul z0.d, z0.d, z0.d
cdot z0.s, z1.b, z2.b, #90
cdot z0.s, z1.b, z2.b, #90
cdot z0.s, z0.b, z1.b, #90
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z cdot.d
mul z0.d, z0.d, z0.d
cdot z0.d, z1.h, z2.h, #90
cdot z0.d, z1.h, z2.h, #90
cdot z0.d, z0.h, z1.h, #90
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z cmla.b
mul z0.d, z0.d, z0.d
cmla z0.b, z1.b, z2.b, #90
cmla z0.b, z1.b, z2.b, #90
cmla z0.b, z0.b, z1.b, #90
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z cmla.d
mul z0.d, z0.d, z0.d
cmla z0.d, z1.d, z2.d, #90
cmla z0.d, z1.d, z2.d, #90
cmla z0.d, z0.d, z1.d, #90
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z sdot.s
mul z0.d, z0.d, z0.d
sdot z0.s, z1.b, z2.b
sdot z0.s, z1.b, z2.b
sdot z0.s, z0.b, z1.b
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z sudot
mul z0.d, z0.d, z0.d
sdot z0.s, z1.b, z2.b[1]
sdot z0.s, z1.b, z2.b[1]
sdot z0.s, z0.b, z1.b[1]
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z sdot.d
mul z0.d, z0.d, z0.d
sdot z0.d, z1.h, z2.h
sdot z0.d, z1.h, z2.h
sdot z0.d, z0.h, z1.h
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z smmla
mul z0.s, z0.s, z0.s
smmla z0.s, z1.b, z2.b
smmla z0.s, z1.b, z2.b
smmla z0.s, z0.b, z1.b
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z mla.b
mul z0.d, z0.d, z0.d
mla z0.b, p0/m, z1.b, z2.b
mla z0.b, p0/m, z1.b, z2.b
mla z0.b, p0/m, z0.b, z1.b
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z mla.d
mul z0.d, z0.d, z0.d
mla z0.d, p0/m, z1.d, z2.d
mla z0.d, p0/m, z1.d, z2.d
mla z0.d, p0/m, z0.d, z1.d
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z smlalb
mul z0.d, z0.d, z0.d
smlalb z0.d, z1.s, z2.s
smlalb z0.d, z1.s, z2.s
smlalb z0.d, z0.s, z1.s
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z sqdmlalb
mul z0.d, z0.d, z0.d
sqdmlalb z0.d, z1.s, z2.s
sqdmlalb z0.d, z1.s, z2.s
sqdmlalb z0.d, z0.s, z1.s
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z sqrdmlah.b
mul z0.d, z0.d, z0.d
sqrdmlah z0.b, z1.b, z2.b
sqrdmlah z0.b, z1.b, z2.b
sqrdmlah z0.b, z0.b, z1.b
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z sqrdmlah.d
mul z0.d, z0.d, z0.d
sqrdmlah z0.d, z1.d, z2.d
sqrdmlah z0.d, z1.d, z2.d
sqrdmlah z0.d, z0.d, z1.d
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z fcmla ZPmZZ
fmul z0.d, z0.d, z0.d
fcmla z0.d, p0/m, z1.d, z2.d, 90
fcmla z0.d, p0/m, z1.d, z2.d, 90
fcmla z0.d, p0/m, z0.d, z1.d, 90
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z fcmla ZZZI
fmul z0.d, z0.d, z0.d
fcmla z0.s, z1.s, z2.s[1], 90
fcmla z0.s, z1.s, z2.s[1], 90
fcmla z0.s, z0.s, z1.s[1], 90
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z fmla ZPmZZ
fmul z0.d, z0.d, z0.d
fmla z0.d, p0/m, z1.d, z2.d
fmla z0.d, p0/m, z1.d, z2.d
fmla z0.d, p0/m, z0.d, z1.d
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z fmla ZZZI
fmul z0.d, z0.d, z0.d
fmla z0.d, z1.d, z2.d[1]
fmla z0.d, z1.d, z2.d[1]
fmla z0.d, z0.d, z1.d[1]
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z fmlalb ZZZ
fmul z0.d, z0.d, z0.d
fmlalb z0.s, z1.h, z2.h
fmlalb z0.s, z1.h, z2.h
fmlalb z0.s, z0.h, z1.h
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z bfdot
fmul z0.d, z0.d, z0.d
bfdot z0.s, z1.h, z2.h
bfdot z0.s, z1.h, z2.h
bfdot z0.s, z0.h, z1.h
# LLVM-MCA-END
# LLVM-MCA-BEGIN Z bfmmla
fmul z0.d, z0.d, z0.d
bfmmla z0.s, z1.h, z2.h
bfmmla z0.s, z1.h, z2.h
bfmmla z0.s, z0.h, z1.h
# LLVM-MCA-END
# LLVM-MCA-BEGIN bfmlalb
fmul z0.d, z0.d, z0.d
bfmlalb z0.s, z1.h, z2.h
bfmlalb z0.s, z1.h, z2.h
bfmlalb z0.s, z0.h, z1.h
# LLVM-MCA-END
# CHECK: [0] Code Region - madd
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 703
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.57
# CHECK-NEXT: IPC: 0.57
# CHECK-NEXT: Block RThroughput: 3.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeER. . .. mul x0, x0, x0
# CHECK-NEXT: [0,1] D==eeER . .. madd x0, x1, x2, x0
# CHECK-NEXT: [0,2] D===eeER . .. madd x0, x1, x2, x0
# CHECK-NEXT: [0,3] D=====eeER. .. madd x0, x0, x0, x0
# CHECK-NEXT: [1,0] D=======eeER .. mul x0, x0, x0
# CHECK-NEXT: [1,1] D=========eeER .. madd x0, x1, x2, x0
# CHECK-NEXT: [1,2] D==========eeER.. madd x0, x1, x2, x0
# CHECK-NEXT: [1,3] D============eeER madd x0, x0, x0, x0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 4.5 0.5 0.0 mul x0, x0, x0
# CHECK-NEXT: 1. 2 6.5 0.0 0.0 madd x0, x1, x2, x0
# CHECK-NEXT: 2. 2 7.5 0.0 0.0 madd x0, x1, x2, x0
# CHECK-NEXT: 3. 2 9.5 0.0 0.0 madd x0, x0, x0, x0
# CHECK-NEXT: 2 7.0 0.1 0.0 <total>
# CHECK: [1] Code Region - smaddl
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 703
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.57
# CHECK-NEXT: IPC: 0.57
# CHECK-NEXT: Block RThroughput: 3.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeER. . .. mul x0, x0, x0
# CHECK-NEXT: [0,1] D==eeER . .. smaddl x0, w1, w2, x0
# CHECK-NEXT: [0,2] D===eeER . .. smaddl x0, w1, w2, x0
# CHECK-NEXT: [0,3] D=====eeER. .. smaddl x0, w0, w0, x0
# CHECK-NEXT: [1,0] D=======eeER .. mul x0, x0, x0
# CHECK-NEXT: [1,1] D=========eeER .. smaddl x0, w1, w2, x0
# CHECK-NEXT: [1,2] D==========eeER.. smaddl x0, w1, w2, x0
# CHECK-NEXT: [1,3] D============eeER smaddl x0, w0, w0, x0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 4.5 0.5 0.0 mul x0, x0, x0
# CHECK-NEXT: 1. 2 6.5 0.0 0.0 smaddl x0, w1, w2, x0
# CHECK-NEXT: 2. 2 7.5 0.0 0.0 smaddl x0, w1, w2, x0
# CHECK-NEXT: 3. 2 9.5 0.0 0.0 smaddl x0, w0, w0, x0
# CHECK-NEXT: 2 7.0 0.1 0.0 <total>
# CHECK: [2] Code Region - fmadd
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 600
# CHECK-NEXT: Total Cycles: 1703
# CHECK-NEXT: Total uOps: 600
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.35
# CHECK-NEXT: IPC: 0.35
# CHECK-NEXT: Block RThroughput: 1.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeER. . . . . . .. fadd d0, d0, d0
# CHECK-NEXT: [0,1] D==eeeeER . . . . . .. fmadd d0, d1, d2, d0
# CHECK-NEXT: [0,2] D======eeeER . . . . .. fmul d0, d0, d0
# CHECK-NEXT: [0,3] D=======eeeeER . . . . .. fmadd d0, d1, d2, d0
# CHECK-NEXT: [0,4] D=========eeeeER . . . .. fmadd d0, d1, d2, d0
# CHECK-NEXT: [0,5] D=============eeeeER. . . .. fmadd d0, d0, d1, d2
# CHECK-NEXT: [1,0] D=================eeER . . .. fadd d0, d0, d0
# CHECK-NEXT: [1,1] D===================eeeeER . .. fmadd d0, d1, d2, d0
# CHECK-NEXT: [1,2] D=======================eeeER . .. fmul d0, d0, d0
# CHECK-NEXT: [1,3] D========================eeeeER .. fmadd d0, d1, d2, d0
# CHECK-NEXT: [1,4] D==========================eeeeER .. fmadd d0, d1, d2, d0
# CHECK-NEXT: [1,5] D==============================eeeeER fmadd d0, d0, d1, d2
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 9.5 0.5 0.0 fadd d0, d0, d0
# CHECK-NEXT: 1. 2 11.5 0.0 0.0 fmadd d0, d1, d2, d0
# CHECK-NEXT: 2. 2 15.5 0.0 0.0 fmul d0, d0, d0
# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmadd d0, d1, d2, d0
# CHECK-NEXT: 4. 2 18.5 0.0 0.0 fmadd d0, d1, d2, d0
# CHECK-NEXT: 5. 2 22.5 0.0 0.0 fmadd d0, d0, d1, d2
# CHECK-NEXT: 2 15.7 0.1 0.0 <total>
# CHECK: [3] Code Region - saba
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1303
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.31
# CHECK-NEXT: IPC: 0.31
# CHECK-NEXT: Block RThroughput: 1.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 012345678
# CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: [0,1] D====eeeeER . . . . saba v0.4s, v1.4s, v2.4s
# CHECK-NEXT: [0,2] D=====eeeeER . . . . saba v0.4s, v1.4s, v2.4s
# CHECK-NEXT: [0,3] D=========eeeeER . . . saba v0.4s, v0.4s, v1.4s
# CHECK-NEXT: [1,0] D=============eeeeER. . . mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: [1,1] D=================eeeeER . . saba v0.4s, v1.4s, v2.4s
# CHECK-NEXT: [1,2] D==================eeeeER. . saba v0.4s, v1.4s, v2.4s
# CHECK-NEXT: [1,3] D======================eeeeER saba v0.4s, v0.4s, v1.4s
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 7.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1. 2 11.5 0.0 0.0 saba v0.4s, v1.4s, v2.4s
# CHECK-NEXT: 2. 2 12.5 0.0 0.0 saba v0.4s, v1.4s, v2.4s
# CHECK-NEXT: 3. 2 16.5 0.0 0.0 saba v0.4s, v0.4s, v1.4s
# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
# CHECK: [4] Code Region - sdot
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1103
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.36
# CHECK-NEXT: IPC: 0.36
# CHECK-NEXT: Block RThroughput: 0.8
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 01234
# CHECK: [0,0] DeeeeER . . . . mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: [0,1] D====eeeER. . . . sdot v0.4s, v1.16b, v2.16b
# CHECK-NEXT: [0,2] D=====eeeER . . . sdot v0.4s, v1.16b, v2.16b
# CHECK-NEXT: [0,3] D========eeeER . . . sdot v0.4s, v0.16b, v1.16b
# CHECK-NEXT: [1,0] D===========eeeeER . . mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: [1,1] D===============eeeER . sdot v0.4s, v1.16b, v2.16b
# CHECK-NEXT: [1,2] D================eeeER . sdot v0.4s, v1.16b, v2.16b
# CHECK-NEXT: [1,3] D===================eeeER sdot v0.4s, v0.16b, v1.16b
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 6.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1. 2 10.5 0.0 0.0 sdot v0.4s, v1.16b, v2.16b
# CHECK-NEXT: 2. 2 11.5 0.0 0.0 sdot v0.4s, v1.16b, v2.16b
# CHECK-NEXT: 3. 2 14.5 0.0 0.0 sdot v0.4s, v0.16b, v1.16b
# CHECK-NEXT: 2 10.8 0.1 0.0 <total>
# CHECK: [5] Code Region - smmla
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1103
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.36
# CHECK-NEXT: IPC: 0.36
# CHECK-NEXT: Block RThroughput: 0.8
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 01234
# CHECK: [0,0] DeeeeER . . . . mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: [0,1] D====eeeER. . . . smmla v0.4s, v1.16b, v2.16b
# CHECK-NEXT: [0,2] D=====eeeER . . . smmla v0.4s, v1.16b, v2.16b
# CHECK-NEXT: [0,3] D========eeeER . . . smmla v0.4s, v0.16b, v1.16b
# CHECK-NEXT: [1,0] D===========eeeeER . . mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: [1,1] D===============eeeER . smmla v0.4s, v1.16b, v2.16b
# CHECK-NEXT: [1,2] D================eeeER . smmla v0.4s, v1.16b, v2.16b
# CHECK-NEXT: [1,3] D===================eeeER smmla v0.4s, v0.16b, v1.16b
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 6.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1. 2 10.5 0.0 0.0 smmla v0.4s, v1.16b, v2.16b
# CHECK-NEXT: 2. 2 11.5 0.0 0.0 smmla v0.4s, v1.16b, v2.16b
# CHECK-NEXT: 3. 2 14.5 0.0 0.0 smmla v0.4s, v0.16b, v1.16b
# CHECK-NEXT: 2 10.8 0.1 0.0 <total>
# CHECK: [6] Code Region - mla
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1303
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.31
# CHECK-NEXT: IPC: 0.31
# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 012345678
# CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: [0,1] D====eeeeER . . . . mla v0.4s, v1.4s, v2.4s
# CHECK-NEXT: [0,2] D=====eeeeER . . . . mla v0.4s, v1.4s, v2.4s
# CHECK-NEXT: [0,3] D=========eeeeER . . . mla v0.4s, v0.4s, v1.4s
# CHECK-NEXT: [1,0] D=============eeeeER. . . mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: [1,1] D=================eeeeER . . mla v0.4s, v1.4s, v2.4s
# CHECK-NEXT: [1,2] D==================eeeeER. . mla v0.4s, v1.4s, v2.4s
# CHECK-NEXT: [1,3] D======================eeeeER mla v0.4s, v0.4s, v1.4s
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 7.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1. 2 11.5 0.0 0.0 mla v0.4s, v1.4s, v2.4s
# CHECK-NEXT: 2. 2 12.5 0.0 0.0 mla v0.4s, v1.4s, v2.4s
# CHECK-NEXT: 3. 2 16.5 0.0 0.0 mla v0.4s, v0.4s, v1.4s
# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
# CHECK: [7] Code Region - sqrdmlah
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1403
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.29
# CHECK-NEXT: IPC: 0.29
# CHECK-NEXT: Block RThroughput: 3.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: [0,1] D====eeeeER . . . . sqrdmlah v0.4s, v1.4s, v2.4s
# CHECK-NEXT: [0,2] D======eeeeER . . . . sqrdmlah v0.4s, v1.4s, v2.4s
# CHECK-NEXT: [0,3] D==========eeeeER . . . sqrdmlah v0.4s, v0.4s, v1.4s
# CHECK-NEXT: [1,0] D==============eeeeER . . mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: [1,1] D==================eeeeER. . sqrdmlah v0.4s, v1.4s, v2.4s
# CHECK-NEXT: [1,2] D====================eeeeER . sqrdmlah v0.4s, v1.4s, v2.4s
# CHECK-NEXT: [1,3] D========================eeeeER sqrdmlah v0.4s, v0.4s, v1.4s
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1. 2 12.0 0.0 0.0 sqrdmlah v0.4s, v1.4s, v2.4s
# CHECK-NEXT: 2. 2 14.0 0.0 0.0 sqrdmlah v0.4s, v1.4s, v2.4s
# CHECK-NEXT: 3. 2 18.0 0.0 0.0 sqrdmlah v0.4s, v0.4s, v1.4s
# CHECK-NEXT: 2 13.0 0.1 0.0 <total>
# CHECK: [8] Code Region - smlal2
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1303
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.31
# CHECK-NEXT: IPC: 0.31
# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 012345678
# CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: [0,1] D====eeeeER . . . . smlal2 v0.4s, v1.8h, v2.8h
# CHECK-NEXT: [0,2] D=====eeeeER . . . . smlal2 v0.4s, v1.8h, v2.8h
# CHECK-NEXT: [0,3] D=========eeeeER . . . smlal2 v0.4s, v0.8h, v1.8h
# CHECK-NEXT: [1,0] D=============eeeeER. . . mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: [1,1] D=================eeeeER . . smlal2 v0.4s, v1.8h, v2.8h
# CHECK-NEXT: [1,2] D==================eeeeER. . smlal2 v0.4s, v1.8h, v2.8h
# CHECK-NEXT: [1,3] D======================eeeeER smlal2 v0.4s, v0.8h, v1.8h
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 7.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1. 2 11.5 0.0 0.0 smlal2 v0.4s, v1.8h, v2.8h
# CHECK-NEXT: 2. 2 12.5 0.0 0.0 smlal2 v0.4s, v1.8h, v2.8h
# CHECK-NEXT: 3. 2 16.5 0.0 0.0 smlal2 v0.4s, v0.8h, v1.8h
# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
# CHECK: [9] Code Region - sadalp
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1303
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.31
# CHECK-NEXT: IPC: 0.31
# CHECK-NEXT: Block RThroughput: 1.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 012345678
# CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: [0,1] D====eeeeER . . . . sadalp v0.2d, v1.4s
# CHECK-NEXT: [0,2] D=====eeeeER . . . . sadalp v0.2d, v1.4s
# CHECK-NEXT: [0,3] D=========eeeeER . . . sadalp v0.2d, v0.4s
# CHECK-NEXT: [1,0] D=============eeeeER. . . mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: [1,1] D=================eeeeER . . sadalp v0.2d, v1.4s
# CHECK-NEXT: [1,2] D==================eeeeER. . sadalp v0.2d, v1.4s
# CHECK-NEXT: [1,3] D======================eeeeER sadalp v0.2d, v0.4s
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 7.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1. 2 11.5 0.0 0.0 sadalp v0.2d, v1.4s
# CHECK-NEXT: 2. 2 12.5 0.0 0.0 sadalp v0.2d, v1.4s
# CHECK-NEXT: 3. 2 16.5 0.0 0.0 sadalp v0.2d, v0.4s
# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
# CHECK: [10] Code Region - ssra
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1303
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.31
# CHECK-NEXT: IPC: 0.31
# CHECK-NEXT: Block RThroughput: 1.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 012345678
# CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: [0,1] D====eeeeER . . . . ssra v0.2d, v1.2d, #1
# CHECK-NEXT: [0,2] D=====eeeeER . . . . ssra v0.2d, v1.2d, #1
# CHECK-NEXT: [0,3] D=========eeeeER . . . ssra v0.2d, v0.2d, #1
# CHECK-NEXT: [1,0] D=============eeeeER. . . mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: [1,1] D=================eeeeER . . ssra v0.2d, v1.2d, #1
# CHECK-NEXT: [1,2] D==================eeeeER. . ssra v0.2d, v1.2d, #1
# CHECK-NEXT: [1,3] D======================eeeeER ssra v0.2d, v0.2d, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 7.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1. 2 11.5 0.0 0.0 ssra v0.2d, v1.2d, #1
# CHECK-NEXT: 2. 2 12.5 0.0 0.0 ssra v0.2d, v1.2d, #1
# CHECK-NEXT: 3. 2 16.5 0.0 0.0 ssra v0.2d, v0.2d, #1
# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
# CHECK: [11] Code Region - fcmla
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1303
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.31
# CHECK-NEXT: IPC: 0.31
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 012345678
# CHECK: [0,0] DeeeER . . . . . fmul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: [0,1] D===eeeeER. . . . . fcmla v0.2d, v1.2d, v2.2d, #90
# CHECK-NEXT: [0,2] D=====eeeeER . . . . fcmla v0.2d, v1.2d, v2.2d, #90
# CHECK-NEXT: [0,3] D=========eeeeER . . . fcmla v0.2d, v0.2d, v1.2d, #90
# CHECK-NEXT: [1,0] D=============eeeER . . . fmul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: [1,1] D================eeeeER . . fcmla v0.2d, v1.2d, v2.2d, #90
# CHECK-NEXT: [1,2] D==================eeeeER. . fcmla v0.2d, v1.2d, v2.2d, #90
# CHECK-NEXT: [1,3] D======================eeeeER fcmla v0.2d, v0.2d, v1.2d, #90
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 7.5 0.5 0.0 fmul v0.4s, v0.4s, v0.4s
# CHECK-NEXT: 1. 2 10.5 0.0 0.0 fcmla v0.2d, v1.2d, v2.2d, #90
# CHECK-NEXT: 2. 2 12.5 0.0 0.0 fcmla v0.2d, v1.2d, v2.2d, #90
# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fcmla v0.2d, v0.2d, v1.2d, #90
# CHECK-NEXT: 2 11.8 0.1 0.0 <total>
# CHECK: [12] Code Region - fmla
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 600
# CHECK-NEXT: Total Cycles: 1703
# CHECK-NEXT: Total uOps: 600
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.35
# CHECK-NEXT: IPC: 0.35
# CHECK-NEXT: Block RThroughput: 1.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeER . . . . . .. fmul v0.2d, v0.2d, v0.2d
# CHECK-NEXT: [0,1] D=eeeeER . . . . . .. fmla v0.2d, v1.2d, v2.2d
# CHECK-NEXT: [0,2] D=====eeER. . . . . .. fadd v0.2d, v0.2d, v0.2d
# CHECK-NEXT: [0,3] D=======eeeeER . . . . .. fmla v0.2d, v1.2d, v2.2d
# CHECK-NEXT: [0,4] D=========eeeeER . . . .. fmla v0.2d, v1.2d, v2.2d
# CHECK-NEXT: [0,5] D=============eeeeER. . . .. fmla v0.2d, v0.2d, v1.2d
# CHECK-NEXT: [1,0] D=================eeeER . . .. fmul v0.2d, v0.2d, v0.2d
# CHECK-NEXT: [1,1] D==================eeeeER. . .. fmla v0.2d, v1.2d, v2.2d
# CHECK-NEXT: [1,2] D======================eeER . .. fadd v0.2d, v0.2d, v0.2d
# CHECK-NEXT: [1,3] D========================eeeeER .. fmla v0.2d, v1.2d, v2.2d
# CHECK-NEXT: [1,4] D==========================eeeeER .. fmla v0.2d, v1.2d, v2.2d
# CHECK-NEXT: [1,5] D==============================eeeeER fmla v0.2d, v0.2d, v1.2d
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 9.5 0.5 0.0 fmul v0.2d, v0.2d, v0.2d
# CHECK-NEXT: 1. 2 10.5 0.0 0.0 fmla v0.2d, v1.2d, v2.2d
# CHECK-NEXT: 2. 2 14.5 0.0 0.0 fadd v0.2d, v0.2d, v0.2d
# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmla v0.2d, v1.2d, v2.2d
# CHECK-NEXT: 4. 2 18.5 0.0 0.0 fmla v0.2d, v1.2d, v2.2d
# CHECK-NEXT: 5. 2 22.5 0.0 0.0 fmla v0.2d, v0.2d, v1.2d
# CHECK-NEXT: 2 15.3 0.1 0.0 <total>
# CHECK: [13] Code Region - fmlal
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 600
# CHECK-NEXT: Total Cycles: 1903
# CHECK-NEXT: Total uOps: 600
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.32
# CHECK-NEXT: IPC: 0.32
# CHECK-NEXT: Block RThroughput: 1.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789
# CHECK-NEXT: Index 0123456789 0123456789 0
# CHECK: [0,0] DeeeER . . . . . . . fmul v0.2d, v0.2d, v0.2d
# CHECK-NEXT: [0,1] D===eeeeER. . . . . . . fmlal v0.4s, v1.4h, v2.4h
# CHECK-NEXT: [0,2] D=======eeER . . . . . . fadd v0.2d, v0.2d, v0.2d
# CHECK-NEXT: [0,3] D=========eeeeER . . . . . fmlal v0.4s, v1.4h, v2.4h
# CHECK-NEXT: [0,4] D===========eeeeER . . . . . fmlal v0.4s, v1.4h, v2.4h
# CHECK-NEXT: [0,5] D===============eeeeER . . . . fmlal v0.4s, v0.4h, v1.4h
# CHECK-NEXT: [1,0] D===================eeeER. . . . fmul v0.2d, v0.2d, v0.2d
# CHECK-NEXT: [1,1] D======================eeeeER . . . fmlal v0.4s, v1.4h, v2.4h
# CHECK-NEXT: [1,2] D==========================eeER . . fadd v0.2d, v0.2d, v0.2d
# CHECK-NEXT: [1,3] D============================eeeeER. . fmlal v0.4s, v1.4h, v2.4h
# CHECK-NEXT: [1,4] D==============================eeeeER . fmlal v0.4s, v1.4h, v2.4h
# CHECK-NEXT: [1,5] D==================================eeeeER fmlal v0.4s, v0.4h, v1.4h
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 10.5 0.5 0.0 fmul v0.2d, v0.2d, v0.2d
# CHECK-NEXT: 1. 2 13.5 0.0 0.0 fmlal v0.4s, v1.4h, v2.4h
# CHECK-NEXT: 2. 2 17.5 0.0 0.0 fadd v0.2d, v0.2d, v0.2d
# CHECK-NEXT: 3. 2 19.5 0.0 0.0 fmlal v0.4s, v1.4h, v2.4h
# CHECK-NEXT: 4. 2 21.5 0.0 0.0 fmlal v0.4s, v1.4h, v2.4h
# CHECK-NEXT: 5. 2 25.5 0.0 0.0 fmlal v0.4s, v0.4h, v1.4h
# CHECK-NEXT: 2 18.0 0.1 0.0 <total>
# CHECK: [14] Code Region - bfdot
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1603
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.25
# CHECK-NEXT: IPC: 0.25
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 01234
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeER . . . . . . fmul v0.2d, v0.2d, v0.2d
# CHECK-NEXT: [0,1] D===eeeeeER . . . . . bfdot v0.4s, v1.8h, v2.8h
# CHECK-NEXT: [0,2] D======eeeeeER . . . . . bfdot v0.4s, v1.8h, v2.8h
# CHECK-NEXT: [0,3] D===========eeeeeER . . . . bfdot v0.4s, v0.8h, v1.8h
# CHECK-NEXT: [1,0] D================eeeER . . . fmul v0.2d, v0.2d, v0.2d
# CHECK-NEXT: [1,1] D===================eeeeeER . . bfdot v0.4s, v1.8h, v2.8h
# CHECK-NEXT: [1,2] D======================eeeeeER. . bfdot v0.4s, v1.8h, v2.8h
# CHECK-NEXT: [1,3] D===========================eeeeeER bfdot v0.4s, v0.8h, v1.8h
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 9.0 0.5 0.0 fmul v0.2d, v0.2d, v0.2d
# CHECK-NEXT: 1. 2 12.0 0.0 0.0 bfdot v0.4s, v1.8h, v2.8h
# CHECK-NEXT: 2. 2 15.0 0.0 0.0 bfdot v0.4s, v1.8h, v2.8h
# CHECK-NEXT: 3. 2 20.0 0.0 0.0 bfdot v0.4s, v0.8h, v1.8h
# CHECK-NEXT: 2 14.0 0.1 0.0 <total>
# CHECK: [15] Code Region - bfmmla
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1903
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.21
# CHECK-NEXT: IPC: 0.21
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789
# CHECK-NEXT: Index 0123456789 0123456789 0
# CHECK: [0,0] DeeeER . . . . . . . fmul v0.2d, v0.2d, v0.2d
# CHECK-NEXT: [0,1] D===eeeeeeER . . . . . . bfmmla v0.4s, v1.8h, v2.8h
# CHECK-NEXT: [0,2] D=======eeeeeeER . . . . . bfmmla v0.4s, v1.8h, v2.8h
# CHECK-NEXT: [0,3] D=============eeeeeeER . . . . bfmmla v0.4s, v0.8h, v1.8h
# CHECK-NEXT: [1,0] D===================eeeER. . . . fmul v0.2d, v0.2d, v0.2d
# CHECK-NEXT: [1,1] D======================eeeeeeER . . bfmmla v0.4s, v1.8h, v2.8h
# CHECK-NEXT: [1,2] D==========================eeeeeeER. . bfmmla v0.4s, v1.8h, v2.8h
# CHECK-NEXT: [1,3] D================================eeeeeeER bfmmla v0.4s, v0.8h, v1.8h
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 10.5 0.5 0.0 fmul v0.2d, v0.2d, v0.2d
# CHECK-NEXT: 1. 2 13.5 0.0 0.0 bfmmla v0.4s, v1.8h, v2.8h
# CHECK-NEXT: 2. 2 17.5 0.0 0.0 bfmmla v0.4s, v1.8h, v2.8h
# CHECK-NEXT: 3. 2 23.5 0.0 0.0 bfmmla v0.4s, v0.8h, v1.8h
# CHECK-NEXT: 2 16.3 0.1 0.0 <total>
# CHECK: [16] Code Region - bfmlalb
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1503
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.27
# CHECK-NEXT: IPC: 0.27
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 012
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeER . . . . . . fmul v0.2d, v0.2d, v0.2d
# CHECK-NEXT: [0,1] D===eeeeeER . . . . . bfmlalb v0.4s, v1.8h, v2.8h
# CHECK-NEXT: [0,2] D=====eeeeeER . . . . . bfmlalb v0.4s, v1.8h, v2.8h
# CHECK-NEXT: [0,3] D==========eeeeeER . . . . bfmlalb v0.4s, v0.8h, v1.8h
# CHECK-NEXT: [1,0] D===============eeeER . . . fmul v0.2d, v0.2d, v0.2d
# CHECK-NEXT: [1,1] D==================eeeeeER . . bfmlalb v0.4s, v1.8h, v2.8h
# CHECK-NEXT: [1,2] D====================eeeeeER . . bfmlalb v0.4s, v1.8h, v2.8h
# CHECK-NEXT: [1,3] D=========================eeeeeER bfmlalb v0.4s, v0.8h, v1.8h
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 8.5 0.5 0.0 fmul v0.2d, v0.2d, v0.2d
# CHECK-NEXT: 1. 2 11.5 0.0 0.0 bfmlalb v0.4s, v1.8h, v2.8h
# CHECK-NEXT: 2. 2 13.5 0.0 0.0 bfmlalb v0.4s, v1.8h, v2.8h
# CHECK-NEXT: 3. 2 18.5 0.0 0.0 bfmlalb v0.4s, v0.8h, v1.8h
# CHECK-NEXT: 2 13.0 0.1 0.0 <total>
# CHECK: [17] Code Region - crc32b
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 703
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.57
# CHECK-NEXT: IPC: 0.57
# CHECK-NEXT: Block RThroughput: 3.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeER. . .. mul w0, w0, w0
# CHECK-NEXT: [0,1] D==eeER . .. crc32b w0, w0, w1
# CHECK-NEXT: [0,2] D===eeER . .. crc32b w0, w0, w1
# CHECK-NEXT: [0,3] D=====eeER. .. crc32b w0, w0, w0
# CHECK-NEXT: [1,0] D=======eeER .. mul w0, w0, w0
# CHECK-NEXT: [1,1] D=========eeER .. crc32b w0, w0, w1
# CHECK-NEXT: [1,2] D==========eeER.. crc32b w0, w0, w1
# CHECK-NEXT: [1,3] D============eeER crc32b w0, w0, w0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 4.5 0.5 0.0 mul w0, w0, w0
# CHECK-NEXT: 1. 2 6.5 0.0 0.0 crc32b w0, w0, w1
# CHECK-NEXT: 2. 2 7.5 0.0 0.0 crc32b w0, w0, w1
# CHECK-NEXT: 3. 2 9.5 0.0 0.0 crc32b w0, w0, w0
# CHECK-NEXT: 2 7.0 0.1 0.0 <total>
# CHECK: [18] Code Region - Z saba
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1403
# CHECK-NEXT: Total uOps: 500
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.36
# CHECK-NEXT: IPC: 0.29
# CHECK-NEXT: Block RThroughput: 1.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D=====eeeeER . . . . saba z0.d, z1.d, z2.d
# CHECK-NEXT: [0,2] D======eeeeER . . . . saba z0.d, z1.d, z2.d
# CHECK-NEXT: [0,3] D==========eeeeER . . . saba z0.d, z0.d, z1.d
# CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D===================eeeeER . saba z0.d, z1.d, z2.d
# CHECK-NEXT: [1,2] D====================eeeeER . saba z0.d, z1.d, z2.d
# CHECK-NEXT: [1,3] D========================eeeeER saba z0.d, z0.d, z1.d
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 13.0 0.0 0.0 saba z0.d, z1.d, z2.d
# CHECK-NEXT: 2. 2 14.0 0.0 0.0 saba z0.d, z1.d, z2.d
# CHECK-NEXT: 3. 2 18.0 0.0 0.0 saba z0.d, z0.d, z1.d
# CHECK-NEXT: 2 13.3 0.1 0.0 <total>
# CHECK: [19] Code Region - Z sadalp
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1403
# CHECK-NEXT: Total uOps: 500
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.36
# CHECK-NEXT: IPC: 0.29
# CHECK-NEXT: Block RThroughput: 1.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D=====eeeeER . . . . sadalp z0.d, p0/m, z1.s
# CHECK-NEXT: [0,2] D======eeeeER . . . . sadalp z0.d, p0/m, z1.s
# CHECK-NEXT: [0,3] D==========eeeeER . . . sadalp z0.d, p0/m, z0.s
# CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D===================eeeeER . sadalp z0.d, p0/m, z1.s
# CHECK-NEXT: [1,2] D====================eeeeER . sadalp z0.d, p0/m, z1.s
# CHECK-NEXT: [1,3] D========================eeeeER sadalp z0.d, p0/m, z0.s
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 13.0 0.0 0.0 sadalp z0.d, p0/m, z1.s
# CHECK-NEXT: 2. 2 14.0 0.0 0.0 sadalp z0.d, p0/m, z1.s
# CHECK-NEXT: 3. 2 18.0 0.0 0.0 sadalp z0.d, p0/m, z0.s
# CHECK-NEXT: 2 13.3 0.1 0.0 <total>
# CHECK: [20] Code Region - Z ssra
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1403
# CHECK-NEXT: Total uOps: 500
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.36
# CHECK-NEXT: IPC: 0.29
# CHECK-NEXT: Block RThroughput: 1.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D=====eeeeER . . . . ssra z0.d, z1.d, #1
# CHECK-NEXT: [0,2] D======eeeeER . . . . ssra z0.d, z1.d, #1
# CHECK-NEXT: [0,3] D==========eeeeER . . . ssra z0.d, z0.d, #1
# CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D===================eeeeER . ssra z0.d, z1.d, #1
# CHECK-NEXT: [1,2] D====================eeeeER . ssra z0.d, z1.d, #1
# CHECK-NEXT: [1,3] D========================eeeeER ssra z0.d, z0.d, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 13.0 0.0 0.0 ssra z0.d, z1.d, #1
# CHECK-NEXT: 2. 2 14.0 0.0 0.0 ssra z0.d, z1.d, #1
# CHECK-NEXT: 3. 2 18.0 0.0 0.0 ssra z0.d, z0.d, #1
# CHECK-NEXT: 2 13.3 0.1 0.0 <total>
# CHECK: [21] Code Region - Z cdot.s
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1203
# CHECK-NEXT: Total uOps: 500
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.42
# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 0123456
# CHECK: [0,0] DeeeeeER . . . .. mul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D=====eeeER . . .. cdot z0.s, z1.b, z2.b, #90
# CHECK-NEXT: [0,2] D======eeeER . . .. cdot z0.s, z1.b, z2.b, #90
# CHECK-NEXT: [0,3] D=========eeeER. . .. cdot z0.s, z0.b, z1.b, #90
# CHECK-NEXT: [1,0] D============eeeeeER. .. mul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D=================eeeER .. cdot z0.s, z1.b, z2.b, #90
# CHECK-NEXT: [1,2] D==================eeeER .. cdot z0.s, z1.b, z2.b, #90
# CHECK-NEXT: [1,3] D=====================eeeER cdot z0.s, z0.b, z1.b, #90
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 7.0 0.5 0.0 mul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 12.0 0.0 0.0 cdot z0.s, z1.b, z2.b, #90
# CHECK-NEXT: 2. 2 13.0 0.0 0.0 cdot z0.s, z1.b, z2.b, #90
# CHECK-NEXT: 3. 2 16.0 0.0 0.0 cdot z0.s, z0.b, z1.b, #90
# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
# CHECK: [22] Code Region - Z cdot.d
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1403
# CHECK-NEXT: Total uOps: 500
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.36
# CHECK-NEXT: IPC: 0.29
# CHECK-NEXT: Block RThroughput: 2.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D=====eeeeER . . . . cdot z0.d, z1.h, z2.h, #90
# CHECK-NEXT: [0,2] D======eeeeER . . . . cdot z0.d, z1.h, z2.h, #90
# CHECK-NEXT: [0,3] D==========eeeeER . . . cdot z0.d, z0.h, z1.h, #90
# CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D===================eeeeER . cdot z0.d, z1.h, z2.h, #90
# CHECK-NEXT: [1,2] D====================eeeeER . cdot z0.d, z1.h, z2.h, #90
# CHECK-NEXT: [1,3] D========================eeeeER cdot z0.d, z0.h, z1.h, #90
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 13.0 0.0 0.0 cdot z0.d, z1.h, z2.h, #90
# CHECK-NEXT: 2. 2 14.0 0.0 0.0 cdot z0.d, z1.h, z2.h, #90
# CHECK-NEXT: 3. 2 18.0 0.0 0.0 cdot z0.d, z0.h, z1.h, #90
# CHECK-NEXT: 2 13.3 0.1 0.0 <total>
# CHECK: [23] Code Region - Z cmla.b
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1403
# CHECK-NEXT: Total uOps: 500
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.36
# CHECK-NEXT: IPC: 0.29
# CHECK-NEXT: Block RThroughput: 2.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D=====eeeeER . . . . cmla z0.b, z1.b, z2.b, #90
# CHECK-NEXT: [0,2] D======eeeeER . . . . cmla z0.b, z1.b, z2.b, #90
# CHECK-NEXT: [0,3] D==========eeeeER . . . cmla z0.b, z0.b, z1.b, #90
# CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D===================eeeeER . cmla z0.b, z1.b, z2.b, #90
# CHECK-NEXT: [1,2] D====================eeeeER . cmla z0.b, z1.b, z2.b, #90
# CHECK-NEXT: [1,3] D========================eeeeER cmla z0.b, z0.b, z1.b, #90
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 13.0 0.0 0.0 cmla z0.b, z1.b, z2.b, #90
# CHECK-NEXT: 2. 2 14.0 0.0 0.0 cmla z0.b, z1.b, z2.b, #90
# CHECK-NEXT: 3. 2 18.0 0.0 0.0 cmla z0.b, z0.b, z1.b, #90
# CHECK-NEXT: 2 13.3 0.1 0.0 <total>
# CHECK: [24] Code Region - Z cmla.d
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1803
# CHECK-NEXT: Total uOps: 500
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.28
# CHECK-NEXT: IPC: 0.22
# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 012345678
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeeeER . . . . . . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D=====eeeeeER . . . . . . cmla z0.d, z1.d, z2.d, #90
# CHECK-NEXT: [0,2] D========eeeeeER . . . . . cmla z0.d, z1.d, z2.d, #90
# CHECK-NEXT: [0,3] D=============eeeeeER . . . . cmla z0.d, z0.d, z1.d, #90
# CHECK-NEXT: [1,0] D==================eeeeeER . . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D=======================eeeeeER . . cmla z0.d, z1.d, z2.d, #90
# CHECK-NEXT: [1,2] D==========================eeeeeER . . cmla z0.d, z1.d, z2.d, #90
# CHECK-NEXT: [1,3] D===============================eeeeeER cmla z0.d, z0.d, z1.d, #90
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 10.0 0.5 0.0 mul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 15.0 0.0 0.0 cmla z0.d, z1.d, z2.d, #90
# CHECK-NEXT: 2. 2 18.0 0.0 0.0 cmla z0.d, z1.d, z2.d, #90
# CHECK-NEXT: 3. 2 23.0 0.0 0.0 cmla z0.d, z0.d, z1.d, #90
# CHECK-NEXT: 2 16.5 0.1 0.0 <total>
# CHECK: [25] Code Region - Z sdot.s
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1203
# CHECK-NEXT: Total uOps: 500
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.42
# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 0123456
# CHECK: [0,0] DeeeeeER . . . .. mul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D=====eeeER . . .. sdot z0.s, z1.b, z2.b
# CHECK-NEXT: [0,2] D======eeeER . . .. sdot z0.s, z1.b, z2.b
# CHECK-NEXT: [0,3] D=========eeeER. . .. sdot z0.s, z0.b, z1.b
# CHECK-NEXT: [1,0] D============eeeeeER. .. mul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D=================eeeER .. sdot z0.s, z1.b, z2.b
# CHECK-NEXT: [1,2] D==================eeeER .. sdot z0.s, z1.b, z2.b
# CHECK-NEXT: [1,3] D=====================eeeER sdot z0.s, z0.b, z1.b
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 7.0 0.5 0.0 mul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 12.0 0.0 0.0 sdot z0.s, z1.b, z2.b
# CHECK-NEXT: 2. 2 13.0 0.0 0.0 sdot z0.s, z1.b, z2.b
# CHECK-NEXT: 3. 2 16.0 0.0 0.0 sdot z0.s, z0.b, z1.b
# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
# CHECK: [26] Code Region - Z sudot
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1203
# CHECK-NEXT: Total uOps: 500
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.42
# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 0123456
# CHECK: [0,0] DeeeeeER . . . .. mul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D=====eeeER . . .. sdot z0.s, z1.b, z2.b[1]
# CHECK-NEXT: [0,2] D======eeeER . . .. sdot z0.s, z1.b, z2.b[1]
# CHECK-NEXT: [0,3] D=========eeeER. . .. sdot z0.s, z0.b, z1.b[1]
# CHECK-NEXT: [1,0] D============eeeeeER. .. mul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D=================eeeER .. sdot z0.s, z1.b, z2.b[1]
# CHECK-NEXT: [1,2] D==================eeeER .. sdot z0.s, z1.b, z2.b[1]
# CHECK-NEXT: [1,3] D=====================eeeER sdot z0.s, z0.b, z1.b[1]
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 7.0 0.5 0.0 mul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 12.0 0.0 0.0 sdot z0.s, z1.b, z2.b[1]
# CHECK-NEXT: 2. 2 13.0 0.0 0.0 sdot z0.s, z1.b, z2.b[1]
# CHECK-NEXT: 3. 2 16.0 0.0 0.0 sdot z0.s, z0.b, z1.b[1]
# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
# CHECK: [27] Code Region - Z sdot.d
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1403
# CHECK-NEXT: Total uOps: 500
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.36
# CHECK-NEXT: IPC: 0.29
# CHECK-NEXT: Block RThroughput: 2.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D=====eeeeER . . . . sdot z0.d, z1.h, z2.h
# CHECK-NEXT: [0,2] D======eeeeER . . . . sdot z0.d, z1.h, z2.h
# CHECK-NEXT: [0,3] D==========eeeeER . . . sdot z0.d, z0.h, z1.h
# CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D===================eeeeER . sdot z0.d, z1.h, z2.h
# CHECK-NEXT: [1,2] D====================eeeeER . sdot z0.d, z1.h, z2.h
# CHECK-NEXT: [1,3] D========================eeeeER sdot z0.d, z0.h, z1.h
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 13.0 0.0 0.0 sdot z0.d, z1.h, z2.h
# CHECK-NEXT: 2. 2 14.0 0.0 0.0 sdot z0.d, z1.h, z2.h
# CHECK-NEXT: 3. 2 18.0 0.0 0.0 sdot z0.d, z0.h, z1.h
# CHECK-NEXT: 2 13.3 0.1 0.0 <total>
# CHECK: [28] Code Region - Z smmla
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1103
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.36
# CHECK-NEXT: IPC: 0.36
# CHECK-NEXT: Block RThroughput: 0.8
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 01234
# CHECK: [0,0] DeeeeER . . . . mul z0.s, z0.s, z0.s
# CHECK-NEXT: [0,1] D====eeeER. . . . smmla z0.s, z1.b, z2.b
# CHECK-NEXT: [0,2] D=====eeeER . . . smmla z0.s, z1.b, z2.b
# CHECK-NEXT: [0,3] D========eeeER . . . smmla z0.s, z0.b, z1.b
# CHECK-NEXT: [1,0] D===========eeeeER . . mul z0.s, z0.s, z0.s
# CHECK-NEXT: [1,1] D===============eeeER . smmla z0.s, z1.b, z2.b
# CHECK-NEXT: [1,2] D================eeeER . smmla z0.s, z1.b, z2.b
# CHECK-NEXT: [1,3] D===================eeeER smmla z0.s, z0.b, z1.b
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 6.5 0.5 0.0 mul z0.s, z0.s, z0.s
# CHECK-NEXT: 1. 2 10.5 0.0 0.0 smmla z0.s, z1.b, z2.b
# CHECK-NEXT: 2. 2 11.5 0.0 0.0 smmla z0.s, z1.b, z2.b
# CHECK-NEXT: 3. 2 14.5 0.0 0.0 smmla z0.s, z0.b, z1.b
# CHECK-NEXT: 2 10.8 0.1 0.0 <total>
# CHECK: [29] Code Region - Z mla.b
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1403
# CHECK-NEXT: Total uOps: 500
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.36
# CHECK-NEXT: IPC: 0.29
# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D=====eeeeER . . . . mla z0.b, p0/m, z1.b, z2.b
# CHECK-NEXT: [0,2] D======eeeeER . . . . mla z0.b, p0/m, z1.b, z2.b
# CHECK-NEXT: [0,3] D==========eeeeER . . . mla z0.b, p0/m, z0.b, z1.b
# CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D===================eeeeER . mla z0.b, p0/m, z1.b, z2.b
# CHECK-NEXT: [1,2] D====================eeeeER . mla z0.b, p0/m, z1.b, z2.b
# CHECK-NEXT: [1,3] D========================eeeeER mla z0.b, p0/m, z0.b, z1.b
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 13.0 0.0 0.0 mla z0.b, p0/m, z1.b, z2.b
# CHECK-NEXT: 2. 2 14.0 0.0 0.0 mla z0.b, p0/m, z1.b, z2.b
# CHECK-NEXT: 3. 2 18.0 0.0 0.0 mla z0.b, p0/m, z0.b, z1.b
# CHECK-NEXT: 2 13.3 0.1 0.0 <total>
# CHECK: [30] Code Region - Z mla.d
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1803
# CHECK-NEXT: Total uOps: 500
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.28
# CHECK-NEXT: IPC: 0.22
# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 012345678
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeeeER . . . . . . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D=====eeeeeER . . . . . . mla z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: [0,2] D========eeeeeER . . . . . mla z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: [0,3] D=============eeeeeER . . . . mla z0.d, p0/m, z0.d, z1.d
# CHECK-NEXT: [1,0] D==================eeeeeER . . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D=======================eeeeeER . . mla z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: [1,2] D==========================eeeeeER . . mla z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: [1,3] D===============================eeeeeER mla z0.d, p0/m, z0.d, z1.d
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 10.0 0.5 0.0 mul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 15.0 0.0 0.0 mla z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: 2. 2 18.0 0.0 0.0 mla z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: 3. 2 23.0 0.0 0.0 mla z0.d, p0/m, z0.d, z1.d
# CHECK-NEXT: 2 16.5 0.1 0.0 <total>
# CHECK: [31] Code Region - Z smlalb
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1403
# CHECK-NEXT: Total uOps: 500
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.36
# CHECK-NEXT: IPC: 0.29
# CHECK-NEXT: Block RThroughput: 2.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D=====eeeeER . . . . smlalb z0.d, z1.s, z2.s
# CHECK-NEXT: [0,2] D======eeeeER . . . . smlalb z0.d, z1.s, z2.s
# CHECK-NEXT: [0,3] D==========eeeeER . . . smlalb z0.d, z0.s, z1.s
# CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D===================eeeeER . smlalb z0.d, z1.s, z2.s
# CHECK-NEXT: [1,2] D====================eeeeER . smlalb z0.d, z1.s, z2.s
# CHECK-NEXT: [1,3] D========================eeeeER smlalb z0.d, z0.s, z1.s
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 13.0 0.0 0.0 smlalb z0.d, z1.s, z2.s
# CHECK-NEXT: 2. 2 14.0 0.0 0.0 smlalb z0.d, z1.s, z2.s
# CHECK-NEXT: 3. 2 18.0 0.0 0.0 smlalb z0.d, z0.s, z1.s
# CHECK-NEXT: 2 13.3 0.1 0.0 <total>
# CHECK: [32] Code Region - Z sqdmlalb
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1503
# CHECK-NEXT: Total uOps: 500
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.33
# CHECK-NEXT: IPC: 0.27
# CHECK-NEXT: Block RThroughput: 2.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 012
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeeeER . . . . . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D=====eeeeER . . . . . sqdmlalb z0.d, z1.s, z2.s
# CHECK-NEXT: [0,2] D=======eeeeER . . . . . sqdmlalb z0.d, z1.s, z2.s
# CHECK-NEXT: [0,3] D===========eeeeER . . . . sqdmlalb z0.d, z0.s, z1.s
# CHECK-NEXT: [1,0] D===============eeeeeER . . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D====================eeeeER . . sqdmlalb z0.d, z1.s, z2.s
# CHECK-NEXT: [1,2] D======================eeeeER . . sqdmlalb z0.d, z1.s, z2.s
# CHECK-NEXT: [1,3] D==========================eeeeER sqdmlalb z0.d, z0.s, z1.s
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 8.5 0.5 0.0 mul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 13.5 0.0 0.0 sqdmlalb z0.d, z1.s, z2.s
# CHECK-NEXT: 2. 2 15.5 0.0 0.0 sqdmlalb z0.d, z1.s, z2.s
# CHECK-NEXT: 3. 2 19.5 0.0 0.0 sqdmlalb z0.d, z0.s, z1.s
# CHECK-NEXT: 2 14.3 0.1 0.0 <total>
# CHECK: [33] Code Region - Z sqrdmlah.b
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1503
# CHECK-NEXT: Total uOps: 500
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.33
# CHECK-NEXT: IPC: 0.27
# CHECK-NEXT: Block RThroughput: 2.5
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 012
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeeeER . . . . . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D=====eeeeER . . . . . sqrdmlah z0.b, z1.b, z2.b
# CHECK-NEXT: [0,2] D=======eeeeER . . . . . sqrdmlah z0.b, z1.b, z2.b
# CHECK-NEXT: [0,3] D===========eeeeER . . . . sqrdmlah z0.b, z0.b, z1.b
# CHECK-NEXT: [1,0] D===============eeeeeER . . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D====================eeeeER . . sqrdmlah z0.b, z1.b, z2.b
# CHECK-NEXT: [1,2] D======================eeeeER . . sqrdmlah z0.b, z1.b, z2.b
# CHECK-NEXT: [1,3] D==========================eeeeER sqrdmlah z0.b, z0.b, z1.b
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 8.5 0.5 0.0 mul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 13.5 0.0 0.0 sqrdmlah z0.b, z1.b, z2.b
# CHECK-NEXT: 2. 2 15.5 0.0 0.0 sqrdmlah z0.b, z1.b, z2.b
# CHECK-NEXT: 3. 2 19.5 0.0 0.0 sqrdmlah z0.b, z0.b, z1.b
# CHECK-NEXT: 2 14.3 0.1 0.0 <total>
# CHECK: [34] Code Region - Z sqrdmlah.d
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1803
# CHECK-NEXT: Total uOps: 500
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.28
# CHECK-NEXT: IPC: 0.22
# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 012345678
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeeeER . . . . . . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D=====eeeeeER . . . . . . sqrdmlah z0.d, z1.d, z2.d
# CHECK-NEXT: [0,2] D========eeeeeER . . . . . sqrdmlah z0.d, z1.d, z2.d
# CHECK-NEXT: [0,3] D=============eeeeeER . . . . sqrdmlah z0.d, z0.d, z1.d
# CHECK-NEXT: [1,0] D==================eeeeeER . . . mul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D=======================eeeeeER . . sqrdmlah z0.d, z1.d, z2.d
# CHECK-NEXT: [1,2] D==========================eeeeeER . . sqrdmlah z0.d, z1.d, z2.d
# CHECK-NEXT: [1,3] D===============================eeeeeER sqrdmlah z0.d, z0.d, z1.d
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 10.0 0.5 0.0 mul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 15.0 0.0 0.0 sqrdmlah z0.d, z1.d, z2.d
# CHECK-NEXT: 2. 2 18.0 0.0 0.0 sqrdmlah z0.d, z1.d, z2.d
# CHECK-NEXT: 3. 2 23.0 0.0 0.0 sqrdmlah z0.d, z0.d, z1.d
# CHECK-NEXT: 2 16.5 0.1 0.0 <total>
# CHECK: [35] Code Region - Z fcmla ZPmZZ
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1503
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.27
# CHECK-NEXT: IPC: 0.27
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 012
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeER . . . . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D===eeeeeER . . . . . fcmla z0.d, p0/m, z1.d, z2.d, #90
# CHECK-NEXT: [0,2] D=====eeeeeER . . . . . fcmla z0.d, p0/m, z1.d, z2.d, #90
# CHECK-NEXT: [0,3] D==========eeeeeER . . . . fcmla z0.d, p0/m, z0.d, z1.d, #90
# CHECK-NEXT: [1,0] D===============eeeER . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D==================eeeeeER . . fcmla z0.d, p0/m, z1.d, z2.d, #90
# CHECK-NEXT: [1,2] D====================eeeeeER . . fcmla z0.d, p0/m, z1.d, z2.d, #90
# CHECK-NEXT: [1,3] D=========================eeeeeER fcmla z0.d, p0/m, z0.d, z1.d, #90
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 8.5 0.5 0.0 fmul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 11.5 0.0 0.0 fcmla z0.d, p0/m, z1.d, z2.d, #90
# CHECK-NEXT: 2. 2 13.5 0.0 0.0 fcmla z0.d, p0/m, z1.d, z2.d, #90
# CHECK-NEXT: 3. 2 18.5 0.0 0.0 fcmla z0.d, p0/m, z0.d, z1.d, #90
# CHECK-NEXT: 2 13.0 0.1 0.0 <total>
# CHECK: [36] Code Region - Z fcmla ZZZI
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1503
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.27
# CHECK-NEXT: IPC: 0.27
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 012
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeER . . . . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D===eeeeeER . . . . . fcmla z0.s, z1.s, z2.s[1], #90
# CHECK-NEXT: [0,2] D=====eeeeeER . . . . . fcmla z0.s, z1.s, z2.s[1], #90
# CHECK-NEXT: [0,3] D==========eeeeeER . . . . fcmla z0.s, z0.s, z1.s[1], #90
# CHECK-NEXT: [1,0] D===============eeeER . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D==================eeeeeER . . fcmla z0.s, z1.s, z2.s[1], #90
# CHECK-NEXT: [1,2] D====================eeeeeER . . fcmla z0.s, z1.s, z2.s[1], #90
# CHECK-NEXT: [1,3] D=========================eeeeeER fcmla z0.s, z0.s, z1.s[1], #90
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 8.5 0.5 0.0 fmul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 11.5 0.0 0.0 fcmla z0.s, z1.s, z2.s[1], #90
# CHECK-NEXT: 2. 2 13.5 0.0 0.0 fcmla z0.s, z1.s, z2.s[1], #90
# CHECK-NEXT: 3. 2 18.5 0.0 0.0 fcmla z0.s, z0.s, z1.s[1], #90
# CHECK-NEXT: 2 13.0 0.1 0.0 <total>
# CHECK: [37] Code Region - Z fmla ZPmZZ
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1303
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.31
# CHECK-NEXT: IPC: 0.31
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 012345678
# CHECK: [0,0] DeeeER . . . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D===eeeeER. . . . . fmla z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: [0,2] D=====eeeeER . . . . fmla z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: [0,3] D=========eeeeER . . . fmla z0.d, p0/m, z0.d, z1.d
# CHECK-NEXT: [1,0] D=============eeeER . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D================eeeeER . . fmla z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: [1,2] D==================eeeeER. . fmla z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: [1,3] D======================eeeeER fmla z0.d, p0/m, z0.d, z1.d
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 7.5 0.5 0.0 fmul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 10.5 0.0 0.0 fmla z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: 2. 2 12.5 0.0 0.0 fmla z0.d, p0/m, z1.d, z2.d
# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmla z0.d, p0/m, z0.d, z1.d
# CHECK-NEXT: 2 11.8 0.1 0.0 <total>
# CHECK: [38] Code Region - Z fmla ZZZI
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1303
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.31
# CHECK-NEXT: IPC: 0.31
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 012345678
# CHECK: [0,0] DeeeER . . . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D===eeeeER. . . . . fmla z0.d, z1.d, z2.d[1]
# CHECK-NEXT: [0,2] D=====eeeeER . . . . fmla z0.d, z1.d, z2.d[1]
# CHECK-NEXT: [0,3] D=========eeeeER . . . fmla z0.d, z0.d, z1.d[1]
# CHECK-NEXT: [1,0] D=============eeeER . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D================eeeeER . . fmla z0.d, z1.d, z2.d[1]
# CHECK-NEXT: [1,2] D==================eeeeER. . fmla z0.d, z1.d, z2.d[1]
# CHECK-NEXT: [1,3] D======================eeeeER fmla z0.d, z0.d, z1.d[1]
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 7.5 0.5 0.0 fmul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 10.5 0.0 0.0 fmla z0.d, z1.d, z2.d[1]
# CHECK-NEXT: 2. 2 12.5 0.0 0.0 fmla z0.d, z1.d, z2.d[1]
# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmla z0.d, z0.d, z1.d[1]
# CHECK-NEXT: 2 11.8 0.1 0.0 <total>
# CHECK: [39] Code Region - Z fmlalb ZZZ
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1303
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.31
# CHECK-NEXT: IPC: 0.31
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 012345678
# CHECK: [0,0] DeeeER . . . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D===eeeeER. . . . . fmlalb z0.s, z1.h, z2.h
# CHECK-NEXT: [0,2] D=====eeeeER . . . . fmlalb z0.s, z1.h, z2.h
# CHECK-NEXT: [0,3] D=========eeeeER . . . fmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: [1,0] D=============eeeER . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D================eeeeER . . fmlalb z0.s, z1.h, z2.h
# CHECK-NEXT: [1,2] D==================eeeeER. . fmlalb z0.s, z1.h, z2.h
# CHECK-NEXT: [1,3] D======================eeeeER fmlalb z0.s, z0.h, z1.h
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 7.5 0.5 0.0 fmul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 10.5 0.0 0.0 fmlalb z0.s, z1.h, z2.h
# CHECK-NEXT: 2. 2 12.5 0.0 0.0 fmlalb z0.s, z1.h, z2.h
# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: 2 11.8 0.1 0.0 <total>
# CHECK: [40] Code Region - Z bfdot
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1603
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.25
# CHECK-NEXT: IPC: 0.25
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 01234
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeER . . . . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D===eeeeeER . . . . . bfdot z0.s, z1.h, z2.h
# CHECK-NEXT: [0,2] D======eeeeeER . . . . . bfdot z0.s, z1.h, z2.h
# CHECK-NEXT: [0,3] D===========eeeeeER . . . . bfdot z0.s, z0.h, z1.h
# CHECK-NEXT: [1,0] D================eeeER . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D===================eeeeeER . . bfdot z0.s, z1.h, z2.h
# CHECK-NEXT: [1,2] D======================eeeeeER. . bfdot z0.s, z1.h, z2.h
# CHECK-NEXT: [1,3] D===========================eeeeeER bfdot z0.s, z0.h, z1.h
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 9.0 0.5 0.0 fmul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 12.0 0.0 0.0 bfdot z0.s, z1.h, z2.h
# CHECK-NEXT: 2. 2 15.0 0.0 0.0 bfdot z0.s, z1.h, z2.h
# CHECK-NEXT: 3. 2 20.0 0.0 0.0 bfdot z0.s, z0.h, z1.h
# CHECK-NEXT: 2 14.0 0.1 0.0 <total>
# CHECK: [41] Code Region - Z bfmmla
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1903
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.21
# CHECK-NEXT: IPC: 0.21
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789
# CHECK-NEXT: Index 0123456789 0123456789 0
# CHECK: [0,0] DeeeER . . . . . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D===eeeeeeER . . . . . . bfmmla z0.s, z1.h, z2.h
# CHECK-NEXT: [0,2] D=======eeeeeeER . . . . . bfmmla z0.s, z1.h, z2.h
# CHECK-NEXT: [0,3] D=============eeeeeeER . . . . bfmmla z0.s, z0.h, z1.h
# CHECK-NEXT: [1,0] D===================eeeER. . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D======================eeeeeeER . . bfmmla z0.s, z1.h, z2.h
# CHECK-NEXT: [1,2] D==========================eeeeeeER. . bfmmla z0.s, z1.h, z2.h
# CHECK-NEXT: [1,3] D================================eeeeeeER bfmmla z0.s, z0.h, z1.h
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 10.5 0.5 0.0 fmul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 13.5 0.0 0.0 bfmmla z0.s, z1.h, z2.h
# CHECK-NEXT: 2. 2 17.5 0.0 0.0 bfmmla z0.s, z1.h, z2.h
# CHECK-NEXT: 3. 2 23.5 0.0 0.0 bfmmla z0.s, z0.h, z1.h
# CHECK-NEXT: 2 16.3 0.1 0.0 <total>
# CHECK: [42] Code Region - bfmlalb
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 1503
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 16
# CHECK-NEXT: uOps Per Cycle: 0.27
# CHECK-NEXT: IPC: 0.27
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 012
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeER . . . . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [0,1] D===eeeeeER . . . . . bfmlalb z0.s, z1.h, z2.h
# CHECK-NEXT: [0,2] D=====eeeeeER . . . . . bfmlalb z0.s, z1.h, z2.h
# CHECK-NEXT: [0,3] D==========eeeeeER . . . . bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: [1,0] D===============eeeER . . . fmul z0.d, z0.d, z0.d
# CHECK-NEXT: [1,1] D==================eeeeeER . . bfmlalb z0.s, z1.h, z2.h
# CHECK-NEXT: [1,2] D====================eeeeeER . . bfmlalb z0.s, z1.h, z2.h
# CHECK-NEXT: [1,3] D=========================eeeeeER bfmlalb z0.s, z0.h, z1.h
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 8.5 0.5 0.0 fmul z0.d, z0.d, z0.d
# CHECK-NEXT: 1. 2 11.5 0.0 0.0 bfmlalb z0.s, z1.h, z2.h
# CHECK-NEXT: 2. 2 13.5 0.0 0.0 bfmlalb z0.s, z1.h, z2.h
# CHECK-NEXT: 3. 2 18.5 0.0 0.0 bfmlalb z0.s, z0.h, z1.h
# CHECK-NEXT: 2 13.0 0.1 0.0 <total>