llvm/llvm/test/tools/llvm-mca/AArch64/Cortex/A510-writeback.s

# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=aarch64 -mcpu=cortex-a510 --instruction-info=0 --resource-pressure=0 --timeline --timeline-max-iterations=1 < %s | FileCheck %s

# LLVM-MCA-BEGIN G01
ld1  { v1.1d }, [x27], #8
add x0, x27, 1
ld1  { v1.2d }, [x27], #16
add x0, x27, 1
ld1  { v1.2s }, [x27], #8
add x0, x27, 1
ld1  { v1.4h }, [x27], #8
add x0, x27, 1
ld1  { v1.4s }, [x27], #16
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G02
ld1  { v1.8b }, [x27], #8
add x0, x27, 1
ld1  { v1.8h }, [x27], #16
add x0, x27, 1
ld1  { v1.16b }, [x27], #16
add x0, x27, 1
ld1  { v1.1d }, [x27], x28
add x0, x27, 1
ld1  { v1.2d }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G03
ld1  { v1.2s }, [x27], x28
add x0, x27, 1
ld1  { v1.4h }, [x27], x28
add x0, x27, 1
ld1  { v1.4s }, [x27], x28
add x0, x27, 1
ld1  { v1.8b }, [x27], x28
add x0, x27, 1
ld1  { v1.8h }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G04
ld1  { v1.16b }, [x27], x28
add x0, x27, 1
ld1  { v1.1d, v2.1d }, [x27], #16
add x0, x27, 1
ld1  { v1.2d, v2.2d }, [x27], #32
add x0, x27, 1
ld1  { v1.2s, v2.2s }, [x27], #16
add x0, x27, 1
ld1  { v1.4h, v2.4h }, [x27], #16
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G05
ld1  { v1.4s, v2.4s }, [x27], #32
add x0, x27, 1
ld1  { v1.8b, v2.8b }, [x27], #16
add x0, x27, 1
ld1  { v1.8h, v2.8h }, [x27], #32
add x0, x27, 1
ld1  { v1.16b, v2.16b }, [x27], #32
add x0, x27, 1
ld1  { v1.1d, v2.1d }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G06
ld1  { v1.2d, v2.2d }, [x27], x28
add x0, x27, 1
ld1  { v1.2s, v2.2s }, [x27], x28
add x0, x27, 1
ld1  { v1.4h, v2.4h }, [x27], x28
add x0, x27, 1
ld1  { v1.4s, v2.4s }, [x27], x28
add x0, x27, 1
ld1  { v1.8b, v2.8b }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G07
ld1  { v1.8h, v2.8h }, [x27], x28
add x0, x27, 1
ld1  { v1.16b, v2.16b }, [x27], x28
add x0, x27, 1
ld1  { v1.1d, v2.1d, v3.1d }, [x27], #24
add x0, x27, 1
ld1  { v1.2d, v2.2d, v3.2d }, [x27], #48
add x0, x27, 1
ld1  { v1.2s, v2.2s, v3.2s }, [x27], #24
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G08
ld1  { v1.4h, v2.4h, v3.4h }, [x27], #24
add x0, x27, 1
ld1  { v1.4s, v2.4s, v3.4s }, [x27], #48
add x0, x27, 1
ld1  { v1.8b, v2.8b, v3.8b }, [x27], #24
add x0, x27, 1
ld1  { v1.8h, v2.8h, v3.8h }, [x27], #48
add x0, x27, 1
ld1  { v1.16b, v2.16b, v3.16b }, [x27], #48
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G09
ld1  { v1.1d, v2.1d, v3.1d }, [x27], x28
add x0, x27, 1
ld1  { v1.2d, v2.2d, v3.2d }, [x27], x28
add x0, x27, 1
ld1  { v1.2s, v2.2s, v3.2s }, [x27], x28
add x0, x27, 1
ld1  { v1.4h, v2.4h, v3.4h }, [x27], x28
add x0, x27, 1
ld1  { v1.4s, v2.4s, v3.4s }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G10
ld1  { v1.8b, v2.8b, v3.8b }, [x27], x28
add x0, x27, 1
ld1  { v1.8h, v2.8h, v3.8h }, [x27], x28
add x0, x27, 1
ld1  { v1.16b, v2.16b, v3.16b }, [x27], x28
add x0, x27, 1
ld1  { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
add x0, x27, 1
ld1  { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G11
ld1  { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
add x0, x27, 1
ld1  { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
add x0, x27, 1
ld1  { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
add x0, x27, 1
ld1  { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
add x0, x27, 1
ld1  { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G12
ld1  { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
add x0, x27, 1
ld1  { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
add x0, x27, 1
ld1  { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
add x0, x27, 1
ld1  { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
add x0, x27, 1
ld1  { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G13
ld1  { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
add x0, x27, 1
ld1  { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
add x0, x27, 1
ld1  { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
add x0, x27, 1
ld1  { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
add x0, x27, 1
ld1  { v1.b }[0], [x27], #1
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G14
ld1  { v1.b }[8], [x27], #1
add x0, x27, 1
ld1  { v1.b }[0], [x27], x28
add x0, x27, 1
ld1  { v1.b }[8], [x27], x28
add x0, x27, 1
ld1  { v1.h }[0], [x27], #2
add x0, x27, 1
ld1  { v1.h }[4], [x27], #2
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G15
ld1  { v1.h }[0], [x27], x28
add x0, x27, 1
ld1  { v1.h }[4], [x27], x28
add x0, x27, 1
ld1  { v1.s }[0], [x27], #4
add x0, x27, 1
ld1  { v1.s }[0], [x27], x28
add x0, x27, 1
ld1  { v1.d }[0], [x27], #8
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G16
ld1  { v1.d }[0], [x27], x28
add x0, x27, 1
ld1r  { v1.1d }, [x27], #8
add x0, x27, 1
ld1r  { v1.2d }, [x27], #8
add x0, x27, 1
ld1r  { v1.2s }, [x27], #4
add x0, x27, 1
ld1r  { v1.4h }, [x27], #2
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G17
ld1r  { v1.4s }, [x27], #4
add x0, x27, 1
ld1r  { v1.8b }, [x27], #1
add x0, x27, 1
ld1r  { v1.8h }, [x27], #2
add x0, x27, 1
ld1r  { v1.16b }, [x27], #1
add x0, x27, 1
ld1r  { v1.1d }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G18
ld1r  { v1.2d }, [x27], x28
add x0, x27, 1
ld1r  { v1.2s }, [x27], x28
add x0, x27, 1
ld1r  { v1.4h }, [x27], x28
add x0, x27, 1
ld1r  { v1.4s }, [x27], x28
add x0, x27, 1
ld1r  { v1.8b }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G19
ld1r  { v1.8h }, [x27], x28
add x0, x27, 1
ld1r  { v1.16b }, [x27], x28
add x0, x27, 1
ld2  { v1.2d, v2.2d }, [x27], #32
add x0, x27, 1
ld2  { v1.2s, v2.2s }, [x27], #16
add x0, x27, 1
ld2  { v1.4h, v2.4h }, [x27], #16
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G20
ld2  { v1.4s, v2.4s }, [x27], #32
add x0, x27, 1
ld2  { v1.8b, v2.8b }, [x27], #16
add x0, x27, 1
ld2  { v1.8h, v2.8h }, [x27], #32
add x0, x27, 1
ld2  { v1.16b, v2.16b }, [x27], #32
add x0, x27, 1
ld2  { v1.2d, v2.2d }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G21
ld2  { v1.2s, v2.2s }, [x27], x28
add x0, x27, 1
ld2  { v1.4h, v2.4h }, [x27], x28
add x0, x27, 1
ld2  { v1.4s, v2.4s }, [x27], x28
add x0, x27, 1
ld2  { v1.8b, v2.8b }, [x27], x28
add x0, x27, 1
ld2  { v1.8h, v2.8h }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G22
ld2  { v1.16b, v2.16b }, [x27], x28
add x0, x27, 1
ld2  { v1.b, v2.b }[0], [x27], #2
add x0, x27, 1
ld2  { v1.b, v2.b }[8], [x27], #2
add x0, x27, 1
ld2  { v1.b, v2.b }[0], [x27], x28
add x0, x27, 1
ld2  { v1.b, v2.b }[8], [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G23
ld2  { v1.h, v2.h }[0], [x27], #4
add x0, x27, 1
ld2  { v1.h, v2.h }[4], [x27], #4
add x0, x27, 1
ld2  { v1.h, v2.h }[0], [x27], x28
add x0, x27, 1
ld2  { v1.h, v2.h }[4], [x27], x28
add x0, x27, 1
ld2  { v1.s, v2.s }[0], [x27], #8
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G24
ld2  { v1.s, v2.s }[0], [x27], x28
add x0, x27, 1
ld2  { v1.d, v2.d }[0], [x27], #16
add x0, x27, 1
ld2  { v1.d, v2.d }[0], [x27], x28
add x0, x27, 1
ld2r  { v1.1d, v2.1d }, [x27], #16
add x0, x27, 1
ld2r  { v1.2d, v2.2d }, [x27], #16
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G25
ld2r  { v1.2s, v2.2s }, [x27], #8
add x0, x27, 1
ld2r  { v1.4h, v2.4h }, [x27], #4
add x0, x27, 1
ld2r  { v1.4s, v2.4s }, [x27], #8
add x0, x27, 1
ld2r  { v1.8b, v2.8b }, [x27], #2
add x0, x27, 1
ld2r  { v1.8h, v2.8h }, [x27], #4
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G26
ld2r  { v1.16b, v2.16b }, [x27], #2
add x0, x27, 1
ld2r  { v1.1d, v2.1d }, [x27], x28
add x0, x27, 1
ld2r  { v1.2d, v2.2d }, [x27], x28
add x0, x27, 1
ld2r  { v1.2s, v2.2s }, [x27], x28
add x0, x27, 1
ld2r  { v1.4h, v2.4h }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G27
ld2r  { v1.4s, v2.4s }, [x27], x28
add x0, x27, 1
ld2r  { v1.8b, v2.8b }, [x27], x28
add x0, x27, 1
ld2r  { v1.8h, v2.8h }, [x27], x28
add x0, x27, 1
ld2r  { v1.16b, v2.16b }, [x27], x28
add x0, x27, 1
ld3  { v1.2d, v2.2d, v3.2d }, [x27], #48
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G28
ld3  { v1.2s, v2.2s, v3.2s }, [x27], #24
add x0, x27, 1
ld3  { v1.4h, v2.4h, v3.4h }, [x27], #24
add x0, x27, 1
ld3  { v1.4s, v2.4s, v3.4s }, [x27], #48
add x0, x27, 1
ld3  { v1.8b, v2.8b, v3.8b }, [x27], #24
add x0, x27, 1
ld3  { v1.8h, v2.8h, v3.8h }, [x27], #48
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G29
ld3  { v1.16b, v2.16b, v3.16b }, [x27], #48
add x0, x27, 1
ld3  { v1.2d, v2.2d, v3.2d }, [x27], x28
add x0, x27, 1
ld3  { v1.2s, v2.2s, v3.2s }, [x27], x28
add x0, x27, 1
ld3  { v1.4h, v2.4h, v3.4h }, [x27], x28
add x0, x27, 1
ld3  { v1.4s, v2.4s, v3.4s }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G30
ld3  { v1.8b, v2.8b, v3.8b }, [x27], x28
add x0, x27, 1
ld3  { v1.8h, v2.8h, v3.8h }, [x27], x28
add x0, x27, 1
ld3  { v1.16b, v2.16b, v3.16b }, [x27], x28
add x0, x27, 1
ld3  { v1.b, v2.b, v3.b }[0], [x27], #3
add x0, x27, 1
ld3  { v1.b, v2.b, v3.b }[8], [x27], #3
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G31
ld3  { v1.b, v2.b, v3.b }[0], [x27], x28
add x0, x27, 1
ld3  { v1.b, v2.b, v3.b }[8], [x27], x28
add x0, x27, 1
ld3  { v1.h, v2.h, v3.h }[0], [x27], #6
add x0, x27, 1
ld3  { v1.h, v2.h, v3.h }[4], [x27], #6
add x0, x27, 1
ld3  { v1.h, v2.h, v3.h }[0], [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G32
ld3  { v1.h, v2.h, v3.h }[4], [x27], x28
add x0, x27, 1
ld3  { v1.s, v2.s, v3.s }[0], [x27], #12
add x0, x27, 1
ld3  { v1.s, v2.s, v3.s }[0], [x27], x28
add x0, x27, 1
ld3  { v1.d, v2.d, v3.d }[0], [x27], #24
add x0, x27, 1
ld3  { v1.d, v2.d, v3.d }[0], [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G33
ld3r  { v1.1d, v2.1d, v3.1d }, [x27], #24
add x0, x27, 1
ld3r  { v1.2d, v2.2d, v3.2d }, [x27], #24
add x0, x27, 1
ld3r  { v1.2s, v2.2s, v3.2s }, [x27], #12
add x0, x27, 1
ld3r  { v1.4h, v2.4h, v3.4h }, [x27], #6
add x0, x27, 1
ld3r  { v1.4s, v2.4s, v3.4s }, [x27], #12
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G34
ld3r  { v1.8b, v2.8b, v3.8b }, [x27], #3
add x0, x27, 1
ld3r  { v1.8h, v2.8h, v3.8h }, [x27], #6
add x0, x27, 1
ld3r  { v1.16b, v2.16b, v3.16b }, [x27], #3
add x0, x27, 1
ld3r  { v1.1d, v2.1d, v3.1d }, [x27], x28
add x0, x27, 1
ld3r  { v1.2d, v2.2d, v3.2d }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G35
ld3r  { v1.2s, v2.2s, v3.2s }, [x27], x28
add x0, x27, 1
ld3r  { v1.4h, v2.4h, v3.4h }, [x27], x28
add x0, x27, 1
ld3r  { v1.4s, v2.4s, v3.4s }, [x27], x28
add x0, x27, 1
ld3r  { v1.8b, v2.8b, v3.8b }, [x27], x28
add x0, x27, 1
ld3r  { v1.8h, v2.8h, v3.8h }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G36
ld3r  { v1.16b, v2.16b, v3.16b }, [x27], x28
add x0, x27, 1
ld4  { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
add x0, x27, 1
ld4  { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
add x0, x27, 1
ld4  { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
add x0, x27, 1
ld4  { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G37
ld4  { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
add x0, x27, 1
ld4  { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
add x0, x27, 1
ld4  { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
add x0, x27, 1
ld4  { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
add x0, x27, 1
ld4  { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G38
ld4  { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
add x0, x27, 1
ld4  { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
add x0, x27, 1
ld4  { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
add x0, x27, 1
ld4  { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
add x0, x27, 1
ld4  { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G39
ld4  { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
add x0, x27, 1
ld4  { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
add x0, x27, 1
ld4  { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
add x0, x27, 1
ld4  { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
add x0, x27, 1
ld4  { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G40
ld4  { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
add x0, x27, 1
ld4  { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
add x0, x27, 1
ld4  { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
add x0, x27, 1
ld4  { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
add x0, x27, 1
ld4  { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G41
ld4  { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
add x0, x27, 1
ld4  { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
add x0, x27, 1
ld4r  { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
add x0, x27, 1
ld4r  { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
add x0, x27, 1
ld4r  { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G42
ld4r  { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
add x0, x27, 1
ld4r  { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
add x0, x27, 1
ld4r  { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
add x0, x27, 1
ld4r  { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
add x0, x27, 1
ld4r  { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G43
ld4r  { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
add x0, x27, 1
ld4r  { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
add x0, x27, 1
ld4r  { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
add x0, x27, 1
ld4r  { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
add x0, x27, 1
ld4r  { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G44
ld4r  { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
add x0, x27, 1
ld4r  { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
add x0, x27, 1
ld4r  { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
add x0, x27, 1
ldp  s1, s2, [x27], #248
add x0, x27, 1
ldp  d1, d2, [x27], #496
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G45
ldp  q1, q2, [x27], #992
add x0, x27, 1
ldp  s1, s2, [x27, #248]!
add x0, x27, 1
ldp  d1, d2, [x27, #496]!
add x0, x27, 1
ldp  q1, q2, [x27, #992]!
add x0, x27, 1
ldp  w1, w2, [x27], #248
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G46
ldp  x1, x2, [x27], #496
add x0, x27, 1
ldp  w1, w2, [x27, #248]!
add x0, x27, 1
ldp  x1, x2, [x27, #496]!
add x0, x27, 1
ldpsw  x1, x2, [x27], #248
add x0, x27, 1
ldpsw  x1, x2, [x27, #248]!
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G47
ldr  b1, [x27], #254
add x0, x27, 1
ldr  h1, [x27], #254
add x0, x27, 1
ldr  s1, [x27], #254
add x0, x27, 1
ldr  d1, [x27], #254
add x0, x27, 1
ldr  q1, [x27], #254
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G48
ldr  b1, [x27, #254]!
add x0, x27, 1
ldr  h1, [x27, #254]!
add x0, x27, 1
ldr  s1, [x27, #254]!
add x0, x27, 1
ldr  d1, [x27, #254]!
add x0, x27, 1
ldr  q1, [x27, #254]!
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G49
ldr  w1, [x27], #254
add x0, x27, 1
ldr  x1, [x27], #254
add x0, x27, 1
ldr  w1, [x27, #254]!
add x0, x27, 1
ldr  x1, [x27, #254]!
add x0, x27, 1
ldrb  w1, [x27], #254
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G50
ldrb  w1, [x27, #254]!
add x0, x27, 1
ldrh  w1, [x27], #254
add x0, x27, 1
ldrh  w1, [x27, #254]!
add x0, x27, 1
ldrsb  w1, [x27], #254
add x0, x27, 1
ldrsb  x1, [x27], #254
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G51
ldrsb  w1, [x27, #254]!
add x0, x27, 1
ldrsb  x1, [x27, #254]!
add x0, x27, 1
ldrsh  w1, [x27], #254
add x0, x27, 1
ldrsh  x1, [x27], #254
add x0, x27, 1
ldrsh  w1, [x27, #254]!
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G52
ldrsh  x1, [x27, #254]!
add x0, x27, 1
ldrsw  x1, [x27], #254
add x0, x27, 1
ldrsw  x1, [x27, #254]!
add x0, x27, 1
st1  { v1.1d }, [x27], #8
add x0, x27, 1
st1  { v1.2d }, [x27], #16
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G53
st1  { v1.2s }, [x27], #8
add x0, x27, 1
st1  { v1.4h }, [x27], #8
add x0, x27, 1
st1  { v1.4s }, [x27], #16
add x0, x27, 1
st1  { v1.8b }, [x27], #8
add x0, x27, 1
st1  { v1.8h }, [x27], #16
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G54
st1  { v1.16b }, [x27], #16
add x0, x27, 1
st1  { v1.1d }, [x27], x28
add x0, x27, 1
st1  { v1.2d }, [x27], x28
add x0, x27, 1
st1  { v1.2s }, [x27], x28
add x0, x27, 1
st1  { v1.4h }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G55
st1  { v1.4s }, [x27], x28
add x0, x27, 1
st1  { v1.8b }, [x27], x28
add x0, x27, 1
st1  { v1.8h }, [x27], x28
add x0, x27, 1
st1  { v1.16b }, [x27], x28
add x0, x27, 1
st1  { v1.1d, v2.1d }, [x27], #16
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G56
st1  { v1.2d, v2.2d }, [x27], #32
add x0, x27, 1
st1  { v1.2s, v2.2s }, [x27], #16
add x0, x27, 1
st1  { v1.4h, v2.4h }, [x27], #16
add x0, x27, 1
st1  { v1.4s, v2.4s }, [x27], #32
add x0, x27, 1
st1  { v1.8b, v2.8b }, [x27], #16
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G57
st1  { v1.8h, v2.8h }, [x27], #32
add x0, x27, 1
st1  { v1.16b, v2.16b }, [x27], #32
add x0, x27, 1
st1  { v1.1d, v2.1d }, [x27], x28
add x0, x27, 1
st1  { v1.2d, v2.2d }, [x27], x28
add x0, x27, 1
st1  { v1.2s, v2.2s }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G58
st1  { v1.4h, v2.4h }, [x27], x28
add x0, x27, 1
st1  { v1.4s, v2.4s }, [x27], x28
add x0, x27, 1
st1  { v1.8b, v2.8b }, [x27], x28
add x0, x27, 1
st1  { v1.8h, v2.8h }, [x27], x28
add x0, x27, 1
st1  { v1.16b, v2.16b }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G59
st1  { v1.1d, v2.1d, v3.1d }, [x27], #24
add x0, x27, 1
st1  { v1.2d, v2.2d, v3.2d }, [x27], #48
add x0, x27, 1
st1  { v1.2s, v2.2s, v3.2s }, [x27], #24
add x0, x27, 1
st1  { v1.4h, v2.4h, v3.4h }, [x27], #24
add x0, x27, 1
st1  { v1.4s, v2.4s, v3.4s }, [x27], #48
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G60
st1  { v1.8b, v2.8b, v3.8b }, [x27], #24
add x0, x27, 1
st1  { v1.8h, v2.8h, v3.8h }, [x27], #48
add x0, x27, 1
st1  { v1.16b, v2.16b, v3.16b }, [x27], #48
add x0, x27, 1
st1  { v1.1d, v2.1d, v3.1d }, [x27], x28
add x0, x27, 1
st1  { v1.2d, v2.2d, v3.2d }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G61
st1  { v1.2s, v2.2s, v3.2s }, [x27], x28
add x0, x27, 1
st1  { v1.4h, v2.4h, v3.4h }, [x27], x28
add x0, x27, 1
st1  { v1.4s, v2.4s, v3.4s }, [x27], x28
add x0, x27, 1
st1  { v1.8b, v2.8b, v3.8b }, [x27], x28
add x0, x27, 1
st1  { v1.8h, v2.8h, v3.8h }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G62
st1  { v1.16b, v2.16b, v3.16b }, [x27], x28
add x0, x27, 1
st1  { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
add x0, x27, 1
st1  { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
add x0, x27, 1
st1  { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
add x0, x27, 1
st1  { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G63
st1  { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
add x0, x27, 1
st1  { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
add x0, x27, 1
st1  { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
add x0, x27, 1
st1  { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
add x0, x27, 1
st1  { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G64
st1  { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
add x0, x27, 1
st1  { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
add x0, x27, 1
st1  { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
add x0, x27, 1
st1  { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
add x0, x27, 1
st1  { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G65
st1  { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
add x0, x27, 1
st1  { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
add x0, x27, 1
st1  { v1.b }[0], [x27], #1
add x0, x27, 1
st1  { v1.b }[8], [x27], #1
add x0, x27, 1
st1  { v1.b }[0], [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G66
st1  { v1.b }[8], [x27], x28
add x0, x27, 1
st1  { v1.h }[0], [x27], #2
add x0, x27, 1
st1  { v1.h }[4], [x27], #2
add x0, x27, 1
st1  { v1.h }[0], [x27], x28
add x0, x27, 1
st1  { v1.h }[4], [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G67
st1  { v1.s }[0], [x27], #4
add x0, x27, 1
st1  { v1.s }[0], [x27], x28
add x0, x27, 1
st1  { v1.d }[0], [x27], #8
add x0, x27, 1
st1  { v1.d }[0], [x27], x28
add x0, x27, 1
st2  { v1.2d, v2.2d }, [x27], #32
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G68
st2  { v1.2s, v2.2s }, [x27], #16
add x0, x27, 1
st2  { v1.4h, v2.4h }, [x27], #16
add x0, x27, 1
st2  { v1.4s, v2.4s }, [x27], #32
add x0, x27, 1
st2  { v1.8b, v2.8b }, [x27], #16
add x0, x27, 1
st2  { v1.8h, v2.8h }, [x27], #32
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G69
st2  { v1.16b, v2.16b }, [x27], #32
add x0, x27, 1
st2  { v1.2d, v2.2d }, [x27], x28
add x0, x27, 1
st2  { v1.2s, v2.2s }, [x27], x28
add x0, x27, 1
st2  { v1.4h, v2.4h }, [x27], x28
add x0, x27, 1
st2  { v1.4s, v2.4s }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G70
st2  { v1.8b, v2.8b }, [x27], x28
add x0, x27, 1
st2  { v1.8h, v2.8h }, [x27], x28
add x0, x27, 1
st2  { v1.16b, v2.16b }, [x27], x28
add x0, x27, 1
st2  { v1.b, v2.b }[0], [x27], #2
add x0, x27, 1
st2  { v1.b, v2.b }[8], [x27], #2
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G71
st2  { v1.b, v2.b }[0], [x27], x28
add x0, x27, 1
st2  { v1.b, v2.b }[8], [x27], x28
add x0, x27, 1
st2  { v1.h, v2.h }[0], [x27], #4
add x0, x27, 1
st2  { v1.h, v2.h }[4], [x27], #4
add x0, x27, 1
st2  { v1.h, v2.h }[0], [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G72
st2  { v1.h, v2.h }[4], [x27], x28
add x0, x27, 1
st2  { v1.s, v2.s }[0], [x27], #8
add x0, x27, 1
st2  { v1.s, v2.s }[0], [x27], x28
add x0, x27, 1
st2  { v1.d, v2.d }[0], [x27], #16
add x0, x27, 1
st2  { v1.d, v2.d }[0], [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G73
st3  { v1.2d, v2.2d, v3.2d }, [x27], #48
add x0, x27, 1
st3  { v1.2s, v2.2s, v3.2s }, [x27], #24
add x0, x27, 1
st3  { v1.4h, v2.4h, v3.4h }, [x27], #24
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G74
st3  { v1.4s, v2.4s, v3.4s }, [x27], #48
add x0, x27, 1
st3  { v1.8b, v2.8b, v3.8b }, [x27], #24
add x0, x27, 1
st3  { v1.8h, v2.8h, v3.8h }, [x27], #48
add x0, x27, 1
st3  { v1.16b, v2.16b, v3.16b }, [x27], #48
add x0, x27, 1
st3  { v1.2d, v2.2d, v3.2d }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G75
st3  { v1.2s, v2.2s, v3.2s }, [x27], x28
add x0, x27, 1
st3  { v1.4h, v2.4h, v3.4h }, [x27], x28
add x0, x27, 1
st3  { v1.4s, v2.4s, v3.4s }, [x27], x28
add x0, x27, 1
st3  { v1.8b, v2.8b, v3.8b }, [x27], x28
add x0, x27, 1
st3  { v1.8h, v2.8h, v3.8h }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G76
st3  { v1.16b, v2.16b, v3.16b }, [x27], x28
add x0, x27, 1
st3  { v1.b, v2.b, v3.b }[0], [x27], #3
add x0, x27, 1
st3  { v1.b, v2.b, v3.b }[8], [x27], #3
add x0, x27, 1
st3  { v1.b, v2.b, v3.b }[0], [x27], x28
add x0, x27, 1
st3  { v1.b, v2.b, v3.b }[8], [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G77
st3  { v1.h, v2.h, v3.h }[0], [x27], #6
add x0, x27, 1
st3  { v1.h, v2.h, v3.h }[4], [x27], #6
add x0, x27, 1
st3  { v1.h, v2.h, v3.h }[0], [x27], x28
add x0, x27, 1
st3  { v1.h, v2.h, v3.h }[4], [x27], x28
add x0, x27, 1
st3  { v1.s, v2.s, v3.s }[0], [x27], #12
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G78
st3  { v1.s, v2.s, v3.s }[0], [x27], x28
add x0, x27, 1
st3  { v1.d, v2.d, v3.d }[0], [x27], #24
add x0, x27, 1
st3  { v1.d, v2.d, v3.d }[0], [x27], x28
add x0, x27, 1
st4  { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
add x0, x27, 1
st4  { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G79
st4  { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
add x0, x27, 1
st4  { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
add x0, x27, 1
st4  { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
add x0, x27, 1
st4  { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
add x0, x27, 1
st4  { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G80
st4  { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
add x0, x27, 1
st4  { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
add x0, x27, 1
st4  { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
add x0, x27, 1
st4  { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
add x0, x27, 1
st4  { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G81
st4  { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
add x0, x27, 1
st4  { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
add x0, x27, 1
st4  { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
add x0, x27, 1
st4  { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
add x0, x27, 1
st4  { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G82
st4  { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
add x0, x27, 1
st4  { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
add x0, x27, 1
st4  { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
add x0, x27, 1
st4  { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
add x0, x27, 1
st4  { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G83
st4  { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
add x0, x27, 1
st4  { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
add x0, x27, 1
st4  { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
add x0, x27, 1
st4  { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G84
stp  s1, s2, [x27], #248
add x0, x27, 1
stp  d1, d2, [x27], #496
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G85
stp  q1, q2, [x27], #992
add x0, x27, 1
stp  s1, s2, [x27, #248]!
add x0, x27, 1
stp  d1, d2, [x27, #496]!
add x0, x27, 1
stp  q1, q2, [x27, #992]!
add x0, x27, 1
stp  w1, w2, [x27], #248
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G86
stp  x1, x2, [x27], #496
add x0, x27, 1
stp  w1, w2, [x27, #248]!
add x0, x27, 1
stp  x1, x2, [x27, #496]!
add x0, x27, 1
str  b1, [x27], #254
add x0, x27, 1
str  h1, [x27], #254
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G87
str  s1, [x27], #254
add x0, x27, 1
str  d1, [x27], #254
add x0, x27, 1
str  q1, [x27], #254
add x0, x27, 1
str  b1, [x27, #254]!
add x0, x27, 1
str  h1, [x27, #254]!
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G88
str  s1, [x27, #254]!
add x0, x27, 1
str  d1, [x27, #254]!
add x0, x27, 1
str  q1, [x27, #254]!
add x0, x27, 1
str  w1, [x27], #254
add x0, x27, 1
str  x1, [x27], #254
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G89
str  w1, [x27, #254]!
add x0, x27, 1
str  x1, [x27, #254]!
add x0, x27, 1
strb  w1, [x27], #254
add x0, x27, 1
strb  w1, [x27, #254]!
add x0, x27, 1
strh  w1, [x27], #254
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G90
strh  w1, [x27, #254]!
add x0, x27, 1
# LLVM-MCA-END

# LLVM-MCA-BEGIN G91
ldr  x1, [x27], #254
add x0, x27, 1
ldr  x2, [x1], #254
add x0, x27, 1
# LLVM-MCA-END

# CHECK:      [0] Code Region - G01

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      1701
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.88
# CHECK-NEXT: IPC:               0.59
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     01234567
# CHECK-NEXT: Index     0123456789

# CHECK:      [0,0]     DeeE .    .    . .   ld1	{ v1.1d }, [x27], #8
# CHECK-NEXT: [0,1]     . DE .    .    . .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .  DeeeE  .    . .   ld1	{ v1.2d }, [x27], #16
# CHECK-NEXT: [0,3]     .    .DE  .    . .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    . DeeE    . .   ld1	{ v1.2s }, [x27], #8
# CHECK-NEXT: [0,5]     .    .   DE    . .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    DeeE . .   ld1	{ v1.4h }, [x27], #8
# CHECK-NEXT: [0,7]     .    .    . DE . .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .  DeeeE   ld1	{ v1.4s }, [x27], #16
# CHECK-NEXT: [0,9]     .    .    .    .DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld1	{ v1.1d }, [x27], #8
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld1	{ v1.2d }, [x27], #16
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld1	{ v1.2s }, [x27], #8
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld1	{ v1.4h }, [x27], #8
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld1	{ v1.4s }, [x27], #16
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [1] Code Region - G02

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      1801
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.83
# CHECK-NEXT: IPC:               0.56
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     012345678
# CHECK-NEXT: Index     0123456789

# CHECK:      [0,0]     DeeE .    .    .  .   ld1	{ v1.8b }, [x27], #8
# CHECK-NEXT: [0,1]     . DE .    .    .  .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .  DeeeE  .    .  .   ld1	{ v1.8h }, [x27], #16
# CHECK-NEXT: [0,3]     .    .DE  .    .  .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    . DeeeE   .  .   ld1	{ v1.16b }, [x27], #16
# CHECK-NEXT: [0,5]     .    .    DE   .  .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .DeeE.  .   ld1	{ v1.1d }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .  DE.  .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .   DeeeE   ld1	{ v1.2d }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    . DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld1	{ v1.8b }, [x27], #8
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld1	{ v1.8h }, [x27], #16
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld1	{ v1.16b }, [x27], #16
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld1	{ v1.1d }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld1	{ v1.2d }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [2] Code Region - G03

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      1701
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.88
# CHECK-NEXT: IPC:               0.59
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     01234567
# CHECK-NEXT: Index     0123456789

# CHECK:      [0,0]     DeeE .    .    . .   ld1	{ v1.2s }, [x27], x28
# CHECK-NEXT: [0,1]     . DE .    .    . .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .  DeeE   .    . .   ld1	{ v1.4h }, [x27], x28
# CHECK-NEXT: [0,3]     .    DE   .    . .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .DeeeE    . .   ld1	{ v1.4s }, [x27], x28
# CHECK-NEXT: [0,5]     .    .   DE    . .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    DeeE . .   ld1	{ v1.8b }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    . DE . .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .  DeeeE   ld1	{ v1.8h }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld1	{ v1.2s }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld1	{ v1.4h }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld1	{ v1.4s }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld1	{ v1.8b }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld1	{ v1.8h }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [3] Code Region - G04

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld1	{ v1.16b }, [x27], x28
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld1	{ v1.1d, v2.1d }, [x27], #16
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld1	{ v1.2d, v2.2d }, [x27], #32
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld1	{ v1.2s, v2.2s }, [x27], #16
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld1	{ v1.4h, v2.4h }, [x27], #16
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld1	{ v1.16b }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld1	{ v1.1d, v2.1d }, [x27], #16
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld1	{ v1.2d, v2.2d }, [x27], #32
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld1	{ v1.2s, v2.2s }, [x27], #16
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld1	{ v1.4h, v2.4h }, [x27], #16
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [4] Code Region - G05

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld1	{ v1.4s, v2.4s }, [x27], #32
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld1	{ v1.8b, v2.8b }, [x27], #16
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld1	{ v1.8h, v2.8h }, [x27], #32
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld1	{ v1.16b, v2.16b }, [x27], #32
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld1	{ v1.1d, v2.1d }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld1	{ v1.4s, v2.4s }, [x27], #32
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld1	{ v1.8b, v2.8b }, [x27], #16
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld1	{ v1.8h, v2.8h }, [x27], #32
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld1	{ v1.16b, v2.16b }, [x27], #32
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld1	{ v1.1d, v2.1d }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [5] Code Region - G06

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld1	{ v1.2d, v2.2d }, [x27], x28
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld1	{ v1.2s, v2.2s }, [x27], x28
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld1	{ v1.4h, v2.4h }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld1	{ v1.4s, v2.4s }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld1	{ v1.8b, v2.8b }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld1	{ v1.2d, v2.2d }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld1	{ v1.2s, v2.2s }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld1	{ v1.4h, v2.4h }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld1	{ v1.4s, v2.4s }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld1	{ v1.8b, v2.8b }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [6] Code Region - G07

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld1	{ v1.8h, v2.8h }, [x27], x28
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld1	{ v1.16b, v2.16b }, [x27], x28
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld1	{ v1.1d, v2.1d, v3.1d }, [x27], #24
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld1	{ v1.2d, v2.2d, v3.2d }, [x27], #48
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld1	{ v1.2s, v2.2s, v3.2s }, [x27], #24
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld1	{ v1.8h, v2.8h }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld1	{ v1.16b, v2.16b }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld1	{ v1.1d, v2.1d, v3.1d }, [x27], #24
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld1	{ v1.2d, v2.2d, v3.2d }, [x27], #48
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld1	{ v1.2s, v2.2s, v3.2s }, [x27], #24
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [7] Code Region - G08

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld1	{ v1.4h, v2.4h, v3.4h }, [x27], #24
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld1	{ v1.4s, v2.4s, v3.4s }, [x27], #48
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld1	{ v1.8b, v2.8b, v3.8b }, [x27], #24
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld1	{ v1.8h, v2.8h, v3.8h }, [x27], #48
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld1	{ v1.16b, v2.16b, v3.16b }, [x27], #48
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld1	{ v1.4h, v2.4h, v3.4h }, [x27], #24
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld1	{ v1.4s, v2.4s, v3.4s }, [x27], #48
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld1	{ v1.8b, v2.8b, v3.8b }, [x27], #24
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld1	{ v1.8h, v2.8h, v3.8h }, [x27], #48
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld1	{ v1.16b, v2.16b, v3.16b }, [x27], #48
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [8] Code Region - G09

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld1	{ v1.1d, v2.1d, v3.1d }, [x27], x28
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld1	{ v1.2d, v2.2d, v3.2d }, [x27], x28
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld1	{ v1.2s, v2.2s, v3.2s }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld1	{ v1.4h, v2.4h, v3.4h }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld1	{ v1.4s, v2.4s, v3.4s }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld1	{ v1.1d, v2.1d, v3.1d }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld1	{ v1.2d, v2.2d, v3.2d }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld1	{ v1.2s, v2.2s, v3.2s }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld1	{ v1.4h, v2.4h, v3.4h }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld1	{ v1.4s, v2.4s, v3.4s }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [9] Code Region - G10

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld1	{ v1.8b, v2.8b, v3.8b }, [x27], x28
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld1	{ v1.8h, v2.8h, v3.8h }, [x27], x28
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld1	{ v1.16b, v2.16b, v3.16b }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld1	{ v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld1	{ v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld1	{ v1.8b, v2.8b, v3.8b }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld1	{ v1.8h, v2.8h, v3.8h }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld1	{ v1.16b, v2.16b, v3.16b }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld1	{ v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld1	{ v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [10] Code Region - G11

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld1	{ v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld1	{ v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld1	{ v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld1	{ v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld1	{ v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld1	{ v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld1	{ v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld1	{ v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld1	{ v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld1	{ v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [11] Code Region - G12

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld1	{ v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld1	{ v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld1	{ v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld1	{ v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld1	{ v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld1	{ v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld1	{ v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld1	{ v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld1	{ v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld1	{ v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [12] Code Region - G13

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      1901
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.79
# CHECK-NEXT: IPC:               0.53
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789

# CHECK:      [0,0]     DeeeE.    .    .   .   ld1	{ v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
# CHECK-NEXT: [0,1]     .  DE.    .    .   .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .   .   ld1	{ v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
# CHECK-NEXT: [0,3]     .    . DE .    .   .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .   .   ld1	{ v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .DE  .   .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE  .   ld1	{ v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    DE  .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeE   ld1	{ v1.b }[0], [x27], #1
# CHECK-NEXT: [0,9]     .    .    .    .  DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld1	{ v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld1	{ v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld1	{ v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld1	{ v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld1	{ v1.b }[0], [x27], #1
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [13] Code Region - G14

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      1501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    1.00
# CHECK-NEXT: IPC:               0.67
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     012345
# CHECK-NEXT: Index     0123456789

# CHECK:      [0,0]     DeeE .    .    .   ld1	{ v1.b }[8], [x27], #1
# CHECK-NEXT: [0,1]     . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .  DeeE   .    .   ld1	{ v1.b }[0], [x27], x28
# CHECK-NEXT: [0,3]     .    DE   .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .DeeE.    .   ld1	{ v1.b }[8], [x27], x28
# CHECK-NEXT: [0,5]     .    .  DE.    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .   DeeE  .   ld1	{ v1.h }[0], [x27], #2
# CHECK-NEXT: [0,7]     .    .    .DE  .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    . DeeE   ld1	{ v1.h }[4], [x27], #2
# CHECK-NEXT: [0,9]     .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld1	{ v1.b }[8], [x27], #1
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld1	{ v1.b }[0], [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld1	{ v1.b }[8], [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld1	{ v1.h }[0], [x27], #2
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld1	{ v1.h }[4], [x27], #2
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [14] Code Region - G15

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      1501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    1.00
# CHECK-NEXT: IPC:               0.67
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     012345
# CHECK-NEXT: Index     0123456789

# CHECK:      [0,0]     DeeE .    .    .   ld1	{ v1.h }[0], [x27], x28
# CHECK-NEXT: [0,1]     . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .  DeeE   .    .   ld1	{ v1.h }[4], [x27], x28
# CHECK-NEXT: [0,3]     .    DE   .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .DeeE.    .   ld1	{ v1.s }[0], [x27], #4
# CHECK-NEXT: [0,5]     .    .  DE.    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .   DeeE  .   ld1	{ v1.s }[0], [x27], x28
# CHECK-NEXT: [0,7]     .    .    .DE  .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    . DeeE   ld1	{ v1.d }[0], [x27], #8
# CHECK-NEXT: [0,9]     .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld1	{ v1.h }[0], [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld1	{ v1.h }[4], [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld1	{ v1.s }[0], [x27], #4
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld1	{ v1.s }[0], [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld1	{ v1.d }[0], [x27], #8
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [15] Code Region - G16

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      1501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    1.00
# CHECK-NEXT: IPC:               0.67
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     012345
# CHECK-NEXT: Index     0123456789

# CHECK:      [0,0]     DeeE .    .    .   ld1	{ v1.d }[0], [x27], x28
# CHECK-NEXT: [0,1]     . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .  DeeE   .    .   ld1r	{ v1.1d }, [x27], #8
# CHECK-NEXT: [0,3]     .    DE   .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .DeeE.    .   ld1r	{ v1.2d }, [x27], #8
# CHECK-NEXT: [0,5]     .    .  DE.    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .   DeeE  .   ld1r	{ v1.2s }, [x27], #4
# CHECK-NEXT: [0,7]     .    .    .DE  .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    . DeeE   ld1r	{ v1.4h }, [x27], #2
# CHECK-NEXT: [0,9]     .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld1	{ v1.d }[0], [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld1r	{ v1.1d }, [x27], #8
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld1r	{ v1.2d }, [x27], #8
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld1r	{ v1.2s }, [x27], #4
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld1r	{ v1.4h }, [x27], #2
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [16] Code Region - G17

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      1501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    1.00
# CHECK-NEXT: IPC:               0.67
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     012345
# CHECK-NEXT: Index     0123456789

# CHECK:      [0,0]     DeeE .    .    .   ld1r	{ v1.4s }, [x27], #4
# CHECK-NEXT: [0,1]     . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .  DeeE   .    .   ld1r	{ v1.8b }, [x27], #1
# CHECK-NEXT: [0,3]     .    DE   .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .DeeE.    .   ld1r	{ v1.8h }, [x27], #2
# CHECK-NEXT: [0,5]     .    .  DE.    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .   DeeE  .   ld1r	{ v1.16b }, [x27], #1
# CHECK-NEXT: [0,7]     .    .    .DE  .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    . DeeE   ld1r	{ v1.1d }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld1r	{ v1.4s }, [x27], #4
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld1r	{ v1.8b }, [x27], #1
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld1r	{ v1.8h }, [x27], #2
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld1r	{ v1.16b }, [x27], #1
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld1r	{ v1.1d }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [17] Code Region - G18

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      1501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    1.00
# CHECK-NEXT: IPC:               0.67
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     012345
# CHECK-NEXT: Index     0123456789

# CHECK:      [0,0]     DeeE .    .    .   ld1r	{ v1.2d }, [x27], x28
# CHECK-NEXT: [0,1]     . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .  DeeE   .    .   ld1r	{ v1.2s }, [x27], x28
# CHECK-NEXT: [0,3]     .    DE   .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .DeeE.    .   ld1r	{ v1.4h }, [x27], x28
# CHECK-NEXT: [0,5]     .    .  DE.    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .   DeeE  .   ld1r	{ v1.4s }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .DE  .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    . DeeE   ld1r	{ v1.8b }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld1r	{ v1.2d }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld1r	{ v1.2s }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld1r	{ v1.4h }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld1r	{ v1.4s }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld1r	{ v1.8b }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [18] Code Region - G19

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeE .    .    .    .   ld1r	{ v1.8h }, [x27], x28
# CHECK-NEXT: [0,1]     . DE .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .  DeeE   .    .    .   ld1r	{ v1.16b }, [x27], x28
# CHECK-NEXT: [0,3]     .    DE   .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .DeeeeeE  .    .   ld2	{ v1.2d, v2.2d }, [x27], #32
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld2	{ v1.2s, v2.2s }, [x27], #16
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld2	{ v1.4h, v2.4h }, [x27], #16
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld1r	{ v1.8h }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld1r	{ v1.16b }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld2	{ v1.2d, v2.2d }, [x27], #32
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld2	{ v1.2s, v2.2s }, [x27], #16
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld2	{ v1.4h, v2.4h }, [x27], #16
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [19] Code Region - G20

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2801
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.54
# CHECK-NEXT: IPC:               0.36
# CHECK-NEXT: Block RThroughput: 9.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345678

# CHECK:      [0,0]     DeeeeeE   .    .    .    .  .   ld2	{ v1.4s, v2.4s }, [x27], #32
# CHECK-NEXT: [0,1]     .    DE   .    .    .    .  .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    .DeeeE    .    .    .  .   ld2	{ v1.8b, v2.8b }, [x27], #16
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .  .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeeE   .    .  .   ld2	{ v1.8h, v2.8h }, [x27], #32
# CHECK-NEXT: [0,5]     .    .    .    DE   .    .  .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    .DeeeeeE  .  .   ld2	{ v1.16b, v2.16b }, [x27], #32
# CHECK-NEXT: [0,7]     .    .    .    .    .DE  .  .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    . DeeeeeE   ld2	{ v1.2d, v2.2d }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .    .    . DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld2	{ v1.4s, v2.4s }, [x27], #32
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld2	{ v1.8b, v2.8b }, [x27], #16
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld2	{ v1.8h, v2.8h }, [x27], #32
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld2	{ v1.16b, v2.16b }, [x27], #32
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld2	{ v1.2d, v2.2d }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [20] Code Region - G21

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2401
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.62
# CHECK-NEXT: IPC:               0.42
# CHECK-NEXT: Block RThroughput: 7.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          01234

# CHECK:      [0,0]     DeeeE.    .    .    .   .   ld2	{ v1.2s, v2.2s }, [x27], x28
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   .   ld2	{ v1.4h, v2.4h }, [x27], x28
# CHECK-NEXT: [0,3]     .    . DE .    .    .   .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeeeE.    .   .   ld2	{ v1.4s, v2.4s }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .  DE.    .   .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .   DeeeE .   .   ld2	{ v1.8b, v2.8b }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    . DE .   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .  DeeeeeE   ld2	{ v1.8h, v2.8h }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .    .  DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld2	{ v1.2s, v2.2s }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld2	{ v1.4h, v2.4h }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld2	{ v1.4s, v2.4s }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld2	{ v1.8b, v2.8b }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld2	{ v1.8h, v2.8h }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [21] Code Region - G22

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2201
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.68
# CHECK-NEXT: IPC:               0.45
# CHECK-NEXT: Block RThroughput: 6.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012

# CHECK:      [0,0]     DeeeeeE   .    .    . .   ld2	{ v1.16b, v2.16b }, [x27], x28
# CHECK-NEXT: [0,1]     .    DE   .    .    . .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    .DeeeE    .    . .   ld2	{ v1.b, v2.b }[0], [x27], #2
# CHECK-NEXT: [0,3]     .    .   DE    .    . .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeE.    . .   ld2	{ v1.b, v2.b }[8], [x27], #2
# CHECK-NEXT: [0,5]     .    .    .  DE.    . .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .   DeeeE . .   ld2	{ v1.b, v2.b }[0], [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    . DE . .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .  DeeeE   ld2	{ v1.b, v2.b }[8], [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .    .DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld2	{ v1.16b, v2.16b }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld2	{ v1.b, v2.b }[0], [x27], #2
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld2	{ v1.b, v2.b }[8], [x27], #2
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld2	{ v1.b, v2.b }[0], [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld2	{ v1.b, v2.b }[8], [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [22] Code Region - G23

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld2	{ v1.h, v2.h }[0], [x27], #4
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld2	{ v1.h, v2.h }[4], [x27], #4
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld2	{ v1.h, v2.h }[0], [x27], x28
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld2	{ v1.h, v2.h }[4], [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld2	{ v1.s, v2.s }[0], [x27], #8
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld2	{ v1.h, v2.h }[0], [x27], #4
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld2	{ v1.h, v2.h }[4], [x27], #4
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld2	{ v1.h, v2.h }[0], [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld2	{ v1.h, v2.h }[4], [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld2	{ v1.s, v2.s }[0], [x27], #8
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [23] Code Region - G24

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld2	{ v1.s, v2.s }[0], [x27], x28
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld2	{ v1.d, v2.d }[0], [x27], #16
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld2	{ v1.d, v2.d }[0], [x27], x28
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld2r	{ v1.1d, v2.1d }, [x27], #16
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld2r	{ v1.2d, v2.2d }, [x27], #16
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld2	{ v1.s, v2.s }[0], [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld2	{ v1.d, v2.d }[0], [x27], #16
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld2	{ v1.d, v2.d }[0], [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld2r	{ v1.1d, v2.1d }, [x27], #16
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld2r	{ v1.2d, v2.2d }, [x27], #16
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [24] Code Region - G25

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld2r	{ v1.2s, v2.2s }, [x27], #8
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld2r	{ v1.4h, v2.4h }, [x27], #4
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld2r	{ v1.4s, v2.4s }, [x27], #8
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld2r	{ v1.8b, v2.8b }, [x27], #2
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld2r	{ v1.8h, v2.8h }, [x27], #4
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld2r	{ v1.2s, v2.2s }, [x27], #8
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld2r	{ v1.4h, v2.4h }, [x27], #4
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld2r	{ v1.4s, v2.4s }, [x27], #8
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld2r	{ v1.8b, v2.8b }, [x27], #2
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld2r	{ v1.8h, v2.8h }, [x27], #4
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [25] Code Region - G26

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld2r	{ v1.16b, v2.16b }, [x27], #2
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld2r	{ v1.1d, v2.1d }, [x27], x28
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld2r	{ v1.2d, v2.2d }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld2r	{ v1.2s, v2.2s }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld2r	{ v1.4h, v2.4h }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld2r	{ v1.16b, v2.16b }, [x27], #2
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld2r	{ v1.1d, v2.1d }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld2r	{ v1.2d, v2.2d }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld2r	{ v1.2s, v2.2s }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld2r	{ v1.4h, v2.4h }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [26] Code Region - G27

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2101
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.71
# CHECK-NEXT: IPC:               0.48
# CHECK-NEXT: Block RThroughput: 5.5

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          01

# CHECK:      [0,0]     DeeeE.    .    .    ..   ld2r	{ v1.4s, v2.4s }, [x27], x28
# CHECK-NEXT: [0,1]     .  DE.    .    .    ..   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    ..   ld2r	{ v1.8b, v2.8b }, [x27], x28
# CHECK-NEXT: [0,3]     .    . DE .    .    ..   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    ..   ld2r	{ v1.8h, v2.8h }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .DE  .    ..   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   ..   ld2r	{ v1.16b, v2.16b }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    DE   ..   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeeE   ld3	{ v1.2d, v2.2d, v3.2d }, [x27], #48
# CHECK-NEXT: [0,9]     .    .    .    .    DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld2r	{ v1.4s, v2.4s }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld2r	{ v1.8b, v2.8b }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld2r	{ v1.8h, v2.8h }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld2r	{ v1.16b, v2.16b }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld3	{ v1.2d, v2.2d, v3.2d }, [x27], #48
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [27] Code Region - G28

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 7.5

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   ld3	{ v1.2s, v2.2s, v3.2s }, [x27], #24
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   ld3	{ v1.4h, v2.4h, v3.4h }, [x27], #24
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   ld3	{ v1.4s, v2.4s, v3.4s }, [x27], #48
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   ld3	{ v1.8b, v2.8b, v3.8b }, [x27], #24
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   ld3	{ v1.8h, v2.8h, v3.8h }, [x27], #48
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld3	{ v1.2s, v2.2s, v3.2s }, [x27], #24
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld3	{ v1.4h, v2.4h, v3.4h }, [x27], #24
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld3	{ v1.4s, v2.4s, v3.4s }, [x27], #48
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld3	{ v1.8b, v2.8b, v3.8b }, [x27], #24
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld3	{ v1.8h, v2.8h, v3.8h }, [x27], #48
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [28] Code Region - G29

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 7.5

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   ld3	{ v1.16b, v2.16b, v3.16b }, [x27], #48
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   ld3	{ v1.2d, v2.2d, v3.2d }, [x27], x28
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   ld3	{ v1.2s, v2.2s, v3.2s }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   ld3	{ v1.4h, v2.4h, v3.4h }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   ld3	{ v1.4s, v2.4s, v3.4s }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld3	{ v1.16b, v2.16b, v3.16b }, [x27], #48
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld3	{ v1.2d, v2.2d, v3.2d }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld3	{ v1.2s, v2.2s, v3.2s }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld3	{ v1.4h, v2.4h, v3.4h }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld3	{ v1.4s, v2.4s, v3.4s }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [29] Code Region - G30

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2301
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.65
# CHECK-NEXT: IPC:               0.43
# CHECK-NEXT: Block RThroughput: 6.5

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0123

# CHECK:      [0,0]     DeeeeE    .    .    .  .   ld3	{ v1.8b, v2.8b, v3.8b }, [x27], x28
# CHECK-NEXT: [0,1]     .   DE    .    .    .  .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .  .   ld3	{ v1.8h, v2.8h, v3.8h }, [x27], x28
# CHECK-NEXT: [0,3]     .    .   DE    .    .  .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .  .   ld3	{ v1.16b, v2.16b, v3.16b }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .   DE    .  .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeE.  .   ld3	{ v1.b, v2.b, v3.b }[0], [x27], #3
# CHECK-NEXT: [0,7]     .    .    .    .  DE.  .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .   DeeeE   ld3	{ v1.b, v2.b, v3.b }[8], [x27], #3
# CHECK-NEXT: [0,9]     .    .    .    .    . DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld3	{ v1.8b, v2.8b, v3.8b }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld3	{ v1.8h, v2.8h, v3.8h }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld3	{ v1.16b, v2.16b, v3.16b }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld3	{ v1.b, v2.b, v3.b }[0], [x27], #3
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld3	{ v1.b, v2.b, v3.b }[8], [x27], #3
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [30] Code Region - G31

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld3	{ v1.b, v2.b, v3.b }[0], [x27], x28
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld3	{ v1.b, v2.b, v3.b }[8], [x27], x28
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld3	{ v1.h, v2.h, v3.h }[0], [x27], #6
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld3	{ v1.h, v2.h, v3.h }[4], [x27], #6
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld3	{ v1.h, v2.h, v3.h }[0], [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld3	{ v1.b, v2.b, v3.b }[0], [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld3	{ v1.b, v2.b, v3.b }[8], [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld3	{ v1.h, v2.h, v3.h }[0], [x27], #6
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld3	{ v1.h, v2.h, v3.h }[4], [x27], #6
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld3	{ v1.h, v2.h, v3.h }[0], [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [31] Code Region - G32

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld3	{ v1.h, v2.h, v3.h }[4], [x27], x28
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld3	{ v1.s, v2.s, v3.s }[0], [x27], #12
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld3	{ v1.s, v2.s, v3.s }[0], [x27], x28
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld3	{ v1.d, v2.d, v3.d }[0], [x27], #24
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld3	{ v1.d, v2.d, v3.d }[0], [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld3	{ v1.h, v2.h, v3.h }[4], [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld3	{ v1.s, v2.s, v3.s }[0], [x27], #12
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld3	{ v1.s, v2.s, v3.s }[0], [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld3	{ v1.d, v2.d, v3.d }[0], [x27], #24
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld3	{ v1.d, v2.d, v3.d }[0], [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [32] Code Region - G33

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld3r	{ v1.1d, v2.1d, v3.1d }, [x27], #24
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld3r	{ v1.2d, v2.2d, v3.2d }, [x27], #24
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld3r	{ v1.2s, v2.2s, v3.2s }, [x27], #12
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld3r	{ v1.4h, v2.4h, v3.4h }, [x27], #6
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld3r	{ v1.4s, v2.4s, v3.4s }, [x27], #12
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld3r	{ v1.1d, v2.1d, v3.1d }, [x27], #24
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld3r	{ v1.2d, v2.2d, v3.2d }, [x27], #24
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld3r	{ v1.2s, v2.2s, v3.2s }, [x27], #12
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld3r	{ v1.4h, v2.4h, v3.4h }, [x27], #6
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld3r	{ v1.4s, v2.4s, v3.4s }, [x27], #12
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [33] Code Region - G34

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld3r	{ v1.8b, v2.8b, v3.8b }, [x27], #3
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld3r	{ v1.8h, v2.8h, v3.8h }, [x27], #6
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld3r	{ v1.16b, v2.16b, v3.16b }, [x27], #3
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld3r	{ v1.1d, v2.1d, v3.1d }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld3r	{ v1.2d, v2.2d, v3.2d }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld3r	{ v1.8b, v2.8b, v3.8b }, [x27], #3
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld3r	{ v1.8h, v2.8h, v3.8h }, [x27], #6
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld3r	{ v1.16b, v2.16b, v3.16b }, [x27], #3
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld3r	{ v1.1d, v2.1d, v3.1d }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld3r	{ v1.2d, v2.2d, v3.2d }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [34] Code Region - G35

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld3r	{ v1.2s, v2.2s, v3.2s }, [x27], x28
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld3r	{ v1.4h, v2.4h, v3.4h }, [x27], x28
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld3r	{ v1.4s, v2.4s, v3.4s }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld3r	{ v1.8b, v2.8b, v3.8b }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld3r	{ v1.8h, v2.8h, v3.8h }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld3r	{ v1.2s, v2.2s, v3.2s }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld3r	{ v1.4h, v2.4h, v3.4h }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld3r	{ v1.4s, v2.4s, v3.4s }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld3r	{ v1.8b, v2.8b, v3.8b }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld3r	{ v1.8h, v2.8h, v3.8h }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [35] Code Region - G36

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2801
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.54
# CHECK-NEXT: IPC:               0.36
# CHECK-NEXT: Block RThroughput: 9.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345678

# CHECK:      [0,0]     DeeeE.    .    .    .    .  .   ld3r	{ v1.16b, v2.16b, v3.16b }, [x27], x28
# CHECK-NEXT: [0,1]     .  DE.    .    .    .    .  .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeeeE    .    .    .  .   ld4	{ v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .  .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeeE   .    .  .   ld4	{ v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
# CHECK-NEXT: [0,5]     .    .    .    DE   .    .  .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    .DeeeeeE  .  .   ld4	{ v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
# CHECK-NEXT: [0,7]     .    .    .    .    .DE  .  .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    . DeeeeeE   ld4	{ v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
# CHECK-NEXT: [0,9]     .    .    .    .    .    . DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld3r	{ v1.16b, v2.16b, v3.16b }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld4	{ v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld4	{ v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld4	{ v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld4	{ v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [36] Code Region - G37

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      3001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.50
# CHECK-NEXT: IPC:               0.33
# CHECK-NEXT: Block RThroughput: 10.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789          0
# CHECK-NEXT: Index     0123456789          0123456789

# CHECK:      [0,0]     DeeeeeE   .    .    .    .    .   ld4	{ v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
# CHECK-NEXT: [0,1]     .    DE   .    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    .DeeeeeE  .    .    .    .   ld4	{ v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
# CHECK-NEXT: [0,3]     .    .    .DE  .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    . DeeeeeE .    .    .   ld4	{ v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
# CHECK-NEXT: [0,5]     .    .    .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    .  DeeeeeE.    .   ld4	{ v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    .    .  DE.    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    .   DeeeeeE   ld4	{ v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld4	{ v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld4	{ v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld4	{ v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld4	{ v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld4	{ v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [37] Code Region - G38

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      3001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.50
# CHECK-NEXT: IPC:               0.33
# CHECK-NEXT: Block RThroughput: 10.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789          0
# CHECK-NEXT: Index     0123456789          0123456789

# CHECK:      [0,0]     DeeeeeE   .    .    .    .    .   ld4	{ v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
# CHECK-NEXT: [0,1]     .    DE   .    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    .DeeeeeE  .    .    .    .   ld4	{ v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
# CHECK-NEXT: [0,3]     .    .    .DE  .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    . DeeeeeE .    .    .   ld4	{ v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    .  DeeeeeE.    .   ld4	{ v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    .    .  DE.    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    .   DeeeeeE   ld4	{ v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld4	{ v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld4	{ v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld4	{ v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld4	{ v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld4	{ v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [38] Code Region - G39

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld4	{ v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld4	{ v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld4	{ v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld4	{ v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld4	{ v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld4	{ v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld4	{ v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld4	{ v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld4	{ v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld4	{ v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [39] Code Region - G40

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld4	{ v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld4	{ v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld4	{ v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld4	{ v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld4	{ v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld4	{ v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld4	{ v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld4	{ v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld4	{ v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld4	{ v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [40] Code Region - G41

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld4	{ v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld4	{ v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld4r	{ v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld4r	{ v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld4r	{ v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld4	{ v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld4	{ v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld4r	{ v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld4r	{ v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld4r	{ v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [41] Code Region - G42

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld4r	{ v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld4r	{ v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld4r	{ v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld4r	{ v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld4r	{ v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld4r	{ v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld4r	{ v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld4r	{ v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld4r	{ v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld4r	{ v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [42] Code Region - G43

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   ld4r	{ v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   ld4r	{ v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   ld4r	{ v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   ld4r	{ v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   ld4r	{ v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld4r	{ v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld4r	{ v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld4r	{ v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ld4r	{ v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ld4r	{ v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [43] Code Region - G44

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      1801
# CHECK-NEXT: Total uOps:        1700

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.94
# CHECK-NEXT: IPC:               0.56
# CHECK-NEXT: Block RThroughput: 5.7

# CHECK:      Timeline view:
# CHECK-NEXT:                     012345678
# CHECK-NEXT: Index     0123456789

# CHECK:      [0,0]     DeeeE.    .    .  .   ld4r	{ v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
# CHECK-NEXT: [0,1]     .  DE.    .    .  .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .  .   ld4r	{ v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
# CHECK-NEXT: [0,3]     .    . DE .    .  .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .  .   ld4r	{ v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .DE  .  .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeE  .   ldp	s1, s2, [x27], #248
# CHECK-NEXT: [0,7]     .    .    .   DE  .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    DeeE   ldp	d1, d2, [x27], #496
# CHECK-NEXT: [0,9]     .    .    .    . DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ld4r	{ v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ld4r	{ v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ld4r	{ v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ldp	s1, s2, [x27], #248
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ldp	d1, d2, [x27], #496
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [44] Code Region - G45

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      1501
# CHECK-NEXT: Total uOps:        2000

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    1.33
# CHECK-NEXT: IPC:               0.67
# CHECK-NEXT: Block RThroughput: 6.7

# CHECK:      Timeline view:
# CHECK-NEXT:                     012345
# CHECK-NEXT: Index     0123456789

# CHECK:      [0,0]     DeeE .    .    .   ldp	q1, q2, [x27], #992
# CHECK-NEXT: [0,1]     . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .  DeeE   .    .   ldp	s1, s2, [x27, #248]!
# CHECK-NEXT: [0,3]     .    DE   .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .DeeE.    .   ldp	d1, d2, [x27, #496]!
# CHECK-NEXT: [0,5]     .    .  DE.    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .   DeeE  .   ldp	q1, q2, [x27, #992]!
# CHECK-NEXT: [0,7]     .    .    .DE  .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    . DeeE   ldp	w1, w2, [x27], #248
# CHECK-NEXT: [0,9]     .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ldp	q1, q2, [x27], #992
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ldp	s1, s2, [x27, #248]!
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ldp	d1, d2, [x27, #496]!
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ldp	q1, q2, [x27, #992]!
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ldp	w1, w2, [x27], #248
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [45] Code Region - G46

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      1501
# CHECK-NEXT: Total uOps:        2000

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    1.33
# CHECK-NEXT: IPC:               0.67
# CHECK-NEXT: Block RThroughput: 6.7

# CHECK:      Timeline view:
# CHECK-NEXT:                     012345
# CHECK-NEXT: Index     0123456789

# CHECK:      [0,0]     DeeE .    .    .   ldp	x1, x2, [x27], #496
# CHECK-NEXT: [0,1]     . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .  DeeE   .    .   ldp	w1, w2, [x27, #248]!
# CHECK-NEXT: [0,3]     .    DE   .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .DeeE.    .   ldp	x1, x2, [x27, #496]!
# CHECK-NEXT: [0,5]     .    .  DE.    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .   DeeE  .   ldpsw	x1, x2, [x27], #248
# CHECK-NEXT: [0,7]     .    .    .DE  .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    . DeeE   ldpsw	x1, x2, [x27, #248]!
# CHECK-NEXT: [0,9]     .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ldp	x1, x2, [x27], #496
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ldp	w1, w2, [x27, #248]!
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ldp	x1, x2, [x27, #496]!
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ldpsw	x1, x2, [x27], #248
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ldpsw	x1, x2, [x27, #248]!
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [46] Code Region - G47

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      1001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    1.50
# CHECK-NEXT: IPC:               1.00
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0
# CHECK-NEXT: Index     0123456789

# CHECK:      [0,0]     DeE  .    .   ldr	b1, [x27], #254
# CHECK-NEXT: [0,1]     .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     . DeE.    .   ldr	h1, [x27], #254
# CHECK-NEXT: [0,3]     .  DE.    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .   DeE   .   ldr	s1, [x27], #254
# CHECK-NEXT: [0,5]     .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .DeE .   ldr	d1, [x27], #254
# CHECK-NEXT: [0,7]     .    . DE .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .  DeE   ldr	q1, [x27], #254
# CHECK-NEXT: [0,9]     .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ldr	b1, [x27], #254
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ldr	h1, [x27], #254
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ldr	s1, [x27], #254
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ldr	d1, [x27], #254
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ldr	q1, [x27], #254
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [47] Code Region - G48

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      1001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    1.50
# CHECK-NEXT: IPC:               1.00
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0
# CHECK-NEXT: Index     0123456789

# CHECK:      [0,0]     DeE  .    .   ldr	b1, [x27, #254]!
# CHECK-NEXT: [0,1]     .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     . DeE.    .   ldr	h1, [x27, #254]!
# CHECK-NEXT: [0,3]     .  DE.    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .   DeE   .   ldr	s1, [x27, #254]!
# CHECK-NEXT: [0,5]     .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .DeE .   ldr	d1, [x27, #254]!
# CHECK-NEXT: [0,7]     .    . DE .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .  DeE   ldr	q1, [x27, #254]!
# CHECK-NEXT: [0,9]     .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ldr	b1, [x27, #254]!
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ldr	h1, [x27, #254]!
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ldr	s1, [x27, #254]!
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ldr	d1, [x27, #254]!
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ldr	q1, [x27, #254]!
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [48] Code Region - G49

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      1001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    1.50
# CHECK-NEXT: IPC:               1.00
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0
# CHECK-NEXT: Index     0123456789

# CHECK:      [0,0]     DeE  .    .   ldr	w1, [x27], #254
# CHECK-NEXT: [0,1]     .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     . DeE.    .   ldr	x1, [x27], #254
# CHECK-NEXT: [0,3]     .  DE.    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .   DeE   .   ldr	w1, [x27, #254]!
# CHECK-NEXT: [0,5]     .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .DeE .   ldr	x1, [x27, #254]!
# CHECK-NEXT: [0,7]     .    . DE .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .  DeE   ldrb	w1, [x27], #254
# CHECK-NEXT: [0,9]     .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ldr	w1, [x27], #254
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ldr	x1, [x27], #254
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ldr	w1, [x27, #254]!
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ldr	x1, [x27, #254]!
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ldrb	w1, [x27], #254
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [49] Code Region - G50

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      1001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    1.50
# CHECK-NEXT: IPC:               1.00
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0
# CHECK-NEXT: Index     0123456789

# CHECK:      [0,0]     DeE  .    .   ldrb	w1, [x27, #254]!
# CHECK-NEXT: [0,1]     .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     . DeE.    .   ldrh	w1, [x27], #254
# CHECK-NEXT: [0,3]     .  DE.    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .   DeE   .   ldrh	w1, [x27, #254]!
# CHECK-NEXT: [0,5]     .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .DeE .   ldrsb	w1, [x27], #254
# CHECK-NEXT: [0,7]     .    . DE .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .  DeE   ldrsb	x1, [x27], #254
# CHECK-NEXT: [0,9]     .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ldrb	w1, [x27, #254]!
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ldrh	w1, [x27], #254
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ldrh	w1, [x27, #254]!
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ldrsb	w1, [x27], #254
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ldrsb	x1, [x27], #254
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [50] Code Region - G51

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      1001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    1.50
# CHECK-NEXT: IPC:               1.00
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0
# CHECK-NEXT: Index     0123456789

# CHECK:      [0,0]     DeE  .    .   ldrsb	w1, [x27, #254]!
# CHECK-NEXT: [0,1]     .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     . DeE.    .   ldrsb	x1, [x27, #254]!
# CHECK-NEXT: [0,3]     .  DE.    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .   DeE   .   ldrsh	w1, [x27], #254
# CHECK-NEXT: [0,5]     .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .DeE .   ldrsh	x1, [x27], #254
# CHECK-NEXT: [0,7]     .    . DE .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .  DeE   ldrsh	w1, [x27, #254]!
# CHECK-NEXT: [0,9]     .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ldrsb	w1, [x27, #254]!
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ldrsb	x1, [x27, #254]!
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ldrsh	w1, [x27], #254
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       ldrsh	x1, [x27], #254
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       ldrsh	w1, [x27, #254]!
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [51] Code Region - G52

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      1401
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    1.07
# CHECK-NEXT: IPC:               0.71
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     01234
# CHECK-NEXT: Index     0123456789

# CHECK:      [0,0]     DeE  .    .   .   ldrsh	x1, [x27, #254]!
# CHECK-NEXT: [0,1]     .DE  .    .   .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     . DeE.    .   .   ldrsw	x1, [x27], #254
# CHECK-NEXT: [0,3]     .  DE.    .   .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .   DeE   .   .   ldrsw	x1, [x27, #254]!
# CHECK-NEXT: [0,5]     .    DE   .   .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .DeeeE   .   st1	{ v1.1d }, [x27], #8
# CHECK-NEXT: [0,7]     .    .   DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    DeeeE   st1	{ v1.2d }, [x27], #16
# CHECK-NEXT: [0,9]     .    .    .  DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ldrsh	x1, [x27, #254]!
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ldrsw	x1, [x27], #254
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       ldrsw	x1, [x27, #254]!
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st1	{ v1.1d }, [x27], #8
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st1	{ v1.2d }, [x27], #16
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [52] Code Region - G53

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   st1	{ v1.2s }, [x27], #8
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   st1	{ v1.4h }, [x27], #8
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   st1	{ v1.4s }, [x27], #16
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   st1	{ v1.8b }, [x27], #8
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   st1	{ v1.8h }, [x27], #16
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st1	{ v1.2s }, [x27], #8
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st1	{ v1.4h }, [x27], #8
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st1	{ v1.4s }, [x27], #16
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st1	{ v1.8b }, [x27], #8
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st1	{ v1.8h }, [x27], #16
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [53] Code Region - G54

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   st1	{ v1.16b }, [x27], #16
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   st1	{ v1.1d }, [x27], x28
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   st1	{ v1.2d }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   st1	{ v1.2s }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   st1	{ v1.4h }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st1	{ v1.16b }, [x27], #16
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st1	{ v1.1d }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st1	{ v1.2d }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st1	{ v1.2s }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st1	{ v1.4h }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [54] Code Region - G55

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   st1	{ v1.4s }, [x27], x28
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   st1	{ v1.8b }, [x27], x28
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   st1	{ v1.8h }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   st1	{ v1.16b }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   st1	{ v1.1d, v2.1d }, [x27], #16
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st1	{ v1.4s }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st1	{ v1.8b }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st1	{ v1.8h }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st1	{ v1.16b }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st1	{ v1.1d, v2.1d }, [x27], #16
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [55] Code Region - G56

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   st1	{ v1.2d, v2.2d }, [x27], #32
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   st1	{ v1.2s, v2.2s }, [x27], #16
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   st1	{ v1.4h, v2.4h }, [x27], #16
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   st1	{ v1.4s, v2.4s }, [x27], #32
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   st1	{ v1.8b, v2.8b }, [x27], #16
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st1	{ v1.2d, v2.2d }, [x27], #32
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st1	{ v1.2s, v2.2s }, [x27], #16
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st1	{ v1.4h, v2.4h }, [x27], #16
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st1	{ v1.4s, v2.4s }, [x27], #32
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st1	{ v1.8b, v2.8b }, [x27], #16
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [56] Code Region - G57

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   st1	{ v1.8h, v2.8h }, [x27], #32
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   st1	{ v1.16b, v2.16b }, [x27], #32
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   st1	{ v1.1d, v2.1d }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   st1	{ v1.2d, v2.2d }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   st1	{ v1.2s, v2.2s }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st1	{ v1.8h, v2.8h }, [x27], #32
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st1	{ v1.16b, v2.16b }, [x27], #32
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st1	{ v1.1d, v2.1d }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st1	{ v1.2d, v2.2d }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st1	{ v1.2s, v2.2s }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [57] Code Region - G58

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   st1	{ v1.4h, v2.4h }, [x27], x28
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   st1	{ v1.4s, v2.4s }, [x27], x28
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   st1	{ v1.8b, v2.8b }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   st1	{ v1.8h, v2.8h }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   st1	{ v1.16b, v2.16b }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st1	{ v1.4h, v2.4h }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st1	{ v1.4s, v2.4s }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st1	{ v1.8b, v2.8b }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st1	{ v1.8h, v2.8h }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st1	{ v1.16b, v2.16b }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [58] Code Region - G59

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 10.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   st1	{ v1.1d, v2.1d, v3.1d }, [x27], #24
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   st1	{ v1.2d, v2.2d, v3.2d }, [x27], #48
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   st1	{ v1.2s, v2.2s, v3.2s }, [x27], #24
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   st1	{ v1.4h, v2.4h, v3.4h }, [x27], #24
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   st1	{ v1.4s, v2.4s, v3.4s }, [x27], #48
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st1	{ v1.1d, v2.1d, v3.1d }, [x27], #24
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st1	{ v1.2d, v2.2d, v3.2d }, [x27], #48
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st1	{ v1.2s, v2.2s, v3.2s }, [x27], #24
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st1	{ v1.4h, v2.4h, v3.4h }, [x27], #24
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st1	{ v1.4s, v2.4s, v3.4s }, [x27], #48
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [59] Code Region - G60

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 10.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   st1	{ v1.8b, v2.8b, v3.8b }, [x27], #24
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   st1	{ v1.8h, v2.8h, v3.8h }, [x27], #48
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   st1	{ v1.16b, v2.16b, v3.16b }, [x27], #48
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   st1	{ v1.1d, v2.1d, v3.1d }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   st1	{ v1.2d, v2.2d, v3.2d }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st1	{ v1.8b, v2.8b, v3.8b }, [x27], #24
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st1	{ v1.8h, v2.8h, v3.8h }, [x27], #48
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st1	{ v1.16b, v2.16b, v3.16b }, [x27], #48
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st1	{ v1.1d, v2.1d, v3.1d }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st1	{ v1.2d, v2.2d, v3.2d }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [60] Code Region - G61

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 10.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   st1	{ v1.2s, v2.2s, v3.2s }, [x27], x28
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   st1	{ v1.4h, v2.4h, v3.4h }, [x27], x28
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   st1	{ v1.4s, v2.4s, v3.4s }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   st1	{ v1.8b, v2.8b, v3.8b }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   st1	{ v1.8h, v2.8h, v3.8h }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st1	{ v1.2s, v2.2s, v3.2s }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st1	{ v1.4h, v2.4h, v3.4h }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st1	{ v1.4s, v2.4s, v3.4s }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st1	{ v1.8b, v2.8b, v3.8b }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st1	{ v1.8h, v2.8h, v3.8h }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [61] Code Region - G62

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 18.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   st1	{ v1.16b, v2.16b, v3.16b }, [x27], x28
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   st1	{ v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   st1	{ v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   st1	{ v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   st1	{ v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st1	{ v1.16b, v2.16b, v3.16b }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st1	{ v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st1	{ v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st1	{ v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st1	{ v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [62] Code Region - G63

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 20.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   st1	{ v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   st1	{ v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   st1	{ v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   st1	{ v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   st1	{ v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st1	{ v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st1	{ v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st1	{ v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st1	{ v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st1	{ v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [63] Code Region - G64

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 20.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   st1	{ v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   st1	{ v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   st1	{ v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   st1	{ v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   st1	{ v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st1	{ v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st1	{ v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st1	{ v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st1	{ v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st1	{ v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [64] Code Region - G65

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2201
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.68
# CHECK-NEXT: IPC:               0.45
# CHECK-NEXT: Block RThroughput: 11.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012

# CHECK:      [0,0]     DeeeeE    .    .    . .   st1	{ v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
# CHECK-NEXT: [0,1]     .   DE    .    .    . .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    . .   st1	{ v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
# CHECK-NEXT: [0,3]     .    .   DE    .    . .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeE.    . .   st1	{ v1.b }[0], [x27], #1
# CHECK-NEXT: [0,5]     .    .    .  DE.    . .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .   DeeeE . .   st1	{ v1.b }[8], [x27], #1
# CHECK-NEXT: [0,7]     .    .    .    . DE . .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .  DeeeE   st1	{ v1.b }[0], [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .    .DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st1	{ v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st1	{ v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st1	{ v1.b }[0], [x27], #1
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st1	{ v1.b }[8], [x27], #1
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st1	{ v1.b }[0], [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [65] Code Region - G66

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.75
# CHECK-NEXT: IPC:               0.50
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeE.    .    .    .   st1	{ v1.b }[8], [x27], x28
# CHECK-NEXT: [0,1]     .  DE.    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    .   st1	{ v1.h }[0], [x27], #2
# CHECK-NEXT: [0,3]     .    . DE .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    .   st1	{ v1.h }[4], [x27], #2
# CHECK-NEXT: [0,5]     .    .    .DE  .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   .   st1	{ v1.h }[0], [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeE   st1	{ v1.h }[4], [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st1	{ v1.b }[8], [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st1	{ v1.h }[0], [x27], #2
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st1	{ v1.h }[4], [x27], #2
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st1	{ v1.h }[0], [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st1	{ v1.h }[4], [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [66] Code Region - G67

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2101
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.71
# CHECK-NEXT: IPC:               0.48
# CHECK-NEXT: Block RThroughput: 8.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          01

# CHECK:      [0,0]     DeeeE.    .    .    ..   st1	{ v1.s }[0], [x27], #4
# CHECK-NEXT: [0,1]     .  DE.    .    .    ..   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .   DeeeE .    .    ..   st1	{ v1.s }[0], [x27], x28
# CHECK-NEXT: [0,3]     .    . DE .    .    ..   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .  DeeeE  .    ..   st1	{ v1.d }[0], [x27], #8
# CHECK-NEXT: [0,5]     .    .    .DE  .    ..   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    . DeeeE   ..   st1	{ v1.d }[0], [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    DE   ..   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .DeeeeE   st2	{ v1.2d, v2.2d }, [x27], #32
# CHECK-NEXT: [0,9]     .    .    .    .    DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st1	{ v1.s }[0], [x27], #4
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st1	{ v1.s }[0], [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st1	{ v1.d }[0], [x27], #8
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st1	{ v1.d }[0], [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st2	{ v1.2d, v2.2d }, [x27], #32
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [67] Code Region - G68

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 14.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   st2	{ v1.2s, v2.2s }, [x27], #16
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   st2	{ v1.4h, v2.4h }, [x27], #16
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   st2	{ v1.4s, v2.4s }, [x27], #32
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   st2	{ v1.8b, v2.8b }, [x27], #16
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   st2	{ v1.8h, v2.8h }, [x27], #32
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st2	{ v1.2s, v2.2s }, [x27], #16
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st2	{ v1.4h, v2.4h }, [x27], #16
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st2	{ v1.4s, v2.4s }, [x27], #32
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st2	{ v1.8b, v2.8b }, [x27], #16
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st2	{ v1.8h, v2.8h }, [x27], #32
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [68] Code Region - G69

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 16.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   st2	{ v1.16b, v2.16b }, [x27], #32
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   st2	{ v1.2d, v2.2d }, [x27], x28
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   st2	{ v1.2s, v2.2s }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   st2	{ v1.4h, v2.4h }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   st2	{ v1.4s, v2.4s }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st2	{ v1.16b, v2.16b }, [x27], #32
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st2	{ v1.2d, v2.2d }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st2	{ v1.2s, v2.2s }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st2	{ v1.4h, v2.4h }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st2	{ v1.4s, v2.4s }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [69] Code Region - G70

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 14.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   st2	{ v1.8b, v2.8b }, [x27], x28
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   st2	{ v1.8h, v2.8h }, [x27], x28
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   st2	{ v1.16b, v2.16b }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   st2	{ v1.b, v2.b }[0], [x27], #2
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   st2	{ v1.b, v2.b }[8], [x27], #2
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st2	{ v1.8b, v2.8b }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st2	{ v1.8h, v2.8h }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st2	{ v1.16b, v2.16b }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st2	{ v1.b, v2.b }[0], [x27], #2
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st2	{ v1.b, v2.b }[8], [x27], #2
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [70] Code Region - G71

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 10.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   st2	{ v1.b, v2.b }[0], [x27], x28
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   st2	{ v1.b, v2.b }[8], [x27], x28
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   st2	{ v1.h, v2.h }[0], [x27], #4
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   st2	{ v1.h, v2.h }[4], [x27], #4
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   st2	{ v1.h, v2.h }[0], [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st2	{ v1.b, v2.b }[0], [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st2	{ v1.b, v2.b }[8], [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st2	{ v1.h, v2.h }[0], [x27], #4
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st2	{ v1.h, v2.h }[4], [x27], #4
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st2	{ v1.h, v2.h }[0], [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [71] Code Region - G72

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 10.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   st2	{ v1.h, v2.h }[4], [x27], x28
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   st2	{ v1.s, v2.s }[0], [x27], #8
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   st2	{ v1.s, v2.s }[0], [x27], x28
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   st2	{ v1.d, v2.d }[0], [x27], #16
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   st2	{ v1.d, v2.d }[0], [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st2	{ v1.h, v2.h }[4], [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st2	{ v1.s, v2.s }[0], [x27], #8
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st2	{ v1.s, v2.s }[0], [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st2	{ v1.d, v2.d }[0], [x27], #16
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st2	{ v1.d, v2.d }[0], [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [72] Code Region - G73

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      600
# CHECK-NEXT: Total Cycles:      1501
# CHECK-NEXT: Total uOps:        900

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 12.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     012345
# CHECK-NEXT: Index     0123456789

# CHECK:      [0,0]     DeeeeE    .    .   st3	{ v1.2d, v2.2d, v3.2d }, [x27], #48
# CHECK-NEXT: [0,1]     .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .   st3	{ v1.2s, v2.2s, v3.2s }, [x27], #24
# CHECK-NEXT: [0,3]     .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE   st3	{ v1.4h, v2.4h, v3.4h }, [x27], #24
# CHECK-NEXT: [0,5]     .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st3	{ v1.2d, v2.2d, v3.2d }, [x27], #48
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st3	{ v1.2s, v2.2s, v3.2s }, [x27], #24
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st3	{ v1.4h, v2.4h, v3.4h }, [x27], #24
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [73] Code Region - G74

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 20.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   st3	{ v1.4s, v2.4s, v3.4s }, [x27], #48
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   st3	{ v1.8b, v2.8b, v3.8b }, [x27], #24
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   st3	{ v1.8h, v2.8h, v3.8h }, [x27], #48
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   st3	{ v1.16b, v2.16b, v3.16b }, [x27], #48
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   st3	{ v1.2d, v2.2d, v3.2d }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st3	{ v1.4s, v2.4s, v3.4s }, [x27], #48
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st3	{ v1.8b, v2.8b, v3.8b }, [x27], #24
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st3	{ v1.8h, v2.8h, v3.8h }, [x27], #48
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st3	{ v1.16b, v2.16b, v3.16b }, [x27], #48
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st3	{ v1.2d, v2.2d, v3.2d }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [74] Code Region - G75

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 20.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   st3	{ v1.2s, v2.2s, v3.2s }, [x27], x28
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   st3	{ v1.4h, v2.4h, v3.4h }, [x27], x28
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   st3	{ v1.4s, v2.4s, v3.4s }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   st3	{ v1.8b, v2.8b, v3.8b }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   st3	{ v1.8h, v2.8h, v3.8h }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st3	{ v1.2s, v2.2s, v3.2s }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st3	{ v1.4h, v2.4h, v3.4h }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st3	{ v1.4s, v2.4s, v3.4s }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st3	{ v1.8b, v2.8b, v3.8b }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st3	{ v1.8h, v2.8h, v3.8h }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [75] Code Region - G76

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 12.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   st3	{ v1.16b, v2.16b, v3.16b }, [x27], x28
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   st3	{ v1.b, v2.b, v3.b }[0], [x27], #3
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   st3	{ v1.b, v2.b, v3.b }[8], [x27], #3
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   st3	{ v1.b, v2.b, v3.b }[0], [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   st3	{ v1.b, v2.b, v3.b }[8], [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st3	{ v1.16b, v2.16b, v3.16b }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st3	{ v1.b, v2.b, v3.b }[0], [x27], #3
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st3	{ v1.b, v2.b, v3.b }[8], [x27], #3
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st3	{ v1.b, v2.b, v3.b }[0], [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st3	{ v1.b, v2.b, v3.b }[8], [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [76] Code Region - G77

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 10.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   st3	{ v1.h, v2.h, v3.h }[0], [x27], #6
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   st3	{ v1.h, v2.h, v3.h }[4], [x27], #6
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   st3	{ v1.h, v2.h, v3.h }[0], [x27], x28
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   st3	{ v1.h, v2.h, v3.h }[4], [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   st3	{ v1.s, v2.s, v3.s }[0], [x27], #12
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st3	{ v1.h, v2.h, v3.h }[0], [x27], #6
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st3	{ v1.h, v2.h, v3.h }[4], [x27], #6
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st3	{ v1.h, v2.h, v3.h }[0], [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st3	{ v1.h, v2.h, v3.h }[4], [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st3	{ v1.s, v2.s, v3.s }[0], [x27], #12
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [77] Code Region - G78

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 14.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   st3	{ v1.s, v2.s, v3.s }[0], [x27], x28
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   st3	{ v1.d, v2.d, v3.d }[0], [x27], #24
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   st3	{ v1.d, v2.d, v3.d }[0], [x27], x28
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   st4	{ v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   st4	{ v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st3	{ v1.s, v2.s, v3.s }[0], [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st3	{ v1.d, v2.d, v3.d }[0], [x27], #24
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st3	{ v1.d, v2.d, v3.d }[0], [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st4	{ v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st4	{ v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [78] Code Region - G79

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 20.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   st4	{ v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   st4	{ v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   st4	{ v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   st4	{ v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   st4	{ v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st4	{ v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st4	{ v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st4	{ v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st4	{ v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st4	{ v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [79] Code Region - G80

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 20.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   st4	{ v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   st4	{ v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   st4	{ v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   st4	{ v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   st4	{ v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st4	{ v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st4	{ v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st4	{ v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st4	{ v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st4	{ v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [80] Code Region - G81

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 14.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   st4	{ v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   st4	{ v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   st4	{ v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   st4	{ v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   st4	{ v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st4	{ v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st4	{ v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st4	{ v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st4	{ v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st4	{ v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [81] Code Region - G82

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      2501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 10.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          012345

# CHECK:      [0,0]     DeeeeE    .    .    .    .   st4	{ v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
# CHECK-NEXT: [0,1]     .   DE    .    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .    .   st4	{ v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
# CHECK-NEXT: [0,3]     .    .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .    .   st4	{ v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
# CHECK-NEXT: [0,5]     .    .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE    .   st4	{ v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .    .    .    .    DeeeeE   st4	{ v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
# CHECK-NEXT: [0,9]     .    .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st4	{ v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st4	{ v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st4	{ v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st4	{ v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       st4	{ v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [82] Code Region - G83

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      800
# CHECK-NEXT: Total Cycles:      2001
# CHECK-NEXT: Total uOps:        1200

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    0.60
# CHECK-NEXT: IPC:               0.40
# CHECK-NEXT: Block RThroughput: 8.0

# CHECK:      Timeline view:
# CHECK-NEXT:                     0123456789
# CHECK-NEXT: Index     0123456789          0

# CHECK:      [0,0]     DeeeeE    .    .    .   st4	{ v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
# CHECK-NEXT: [0,1]     .   DE    .    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .    DeeeeE    .    .   st4	{ v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
# CHECK-NEXT: [0,3]     .    .   DE    .    .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     .    .    DeeeeE    .   st4	{ v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
# CHECK-NEXT: [0,5]     .    .    .   DE    .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .    .    .    DeeeeE   st4	{ v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
# CHECK-NEXT: [0,7]     .    .    .    .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       st4	{ v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       st4	{ v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       st4	{ v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       st4	{ v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [83] Code Region - G84

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      400
# CHECK-NEXT: Total Cycles:      201
# CHECK-NEXT: Total uOps:        600

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    2.99
# CHECK-NEXT: IPC:               1.99
# CHECK-NEXT: Block RThroughput: 2.0

# CHECK:      Timeline view:
# CHECK-NEXT: Index     012

# CHECK:      [0,0]     DE.   stp	s1, s2, [x27], #248
# CHECK-NEXT: [0,1]     DE.   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .DE   stp	d1, d2, [x27], #496
# CHECK-NEXT: [0,3]     .DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       stp	s1, s2, [x27], #248
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       stp	d1, d2, [x27], #496
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [84] Code Region - G85

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    2.99
# CHECK-NEXT: IPC:               2.00
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT: Index     012345

# CHECK:      [0,0]     DE   .   stp	q1, q2, [x27], #992
# CHECK-NEXT: [0,1]     DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .DE  .   stp	s1, s2, [x27, #248]!
# CHECK-NEXT: [0,3]     .DE  .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     . DE .   stp	d1, d2, [x27, #496]!
# CHECK-NEXT: [0,5]     . DE .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .  DE.   stp	q1, q2, [x27, #992]!
# CHECK-NEXT: [0,7]     .  DE.   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .   DE   stp	w1, w2, [x27], #248
# CHECK-NEXT: [0,9]     .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       stp	q1, q2, [x27], #992
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       stp	s1, s2, [x27, #248]!
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       stp	d1, d2, [x27, #496]!
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       stp	q1, q2, [x27, #992]!
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       stp	w1, w2, [x27], #248
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [85] Code Region - G86

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    2.99
# CHECK-NEXT: IPC:               2.00
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT: Index     012345

# CHECK:      [0,0]     DE   .   stp	x1, x2, [x27], #496
# CHECK-NEXT: [0,1]     DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .DE  .   stp	w1, w2, [x27, #248]!
# CHECK-NEXT: [0,3]     .DE  .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     . DE .   stp	x1, x2, [x27, #496]!
# CHECK-NEXT: [0,5]     . DE .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .  DE.   str	b1, [x27], #254
# CHECK-NEXT: [0,7]     .  DE.   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .   DE   str	h1, [x27], #254
# CHECK-NEXT: [0,9]     .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       stp	x1, x2, [x27], #496
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       stp	w1, w2, [x27, #248]!
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       stp	x1, x2, [x27, #496]!
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       str	b1, [x27], #254
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       str	h1, [x27], #254
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [86] Code Region - G87

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    2.99
# CHECK-NEXT: IPC:               2.00
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT: Index     012345

# CHECK:      [0,0]     DE   .   str	s1, [x27], #254
# CHECK-NEXT: [0,1]     DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .DE  .   str	d1, [x27], #254
# CHECK-NEXT: [0,3]     .DE  .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     . DE .   str	q1, [x27], #254
# CHECK-NEXT: [0,5]     . DE .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .  DE.   str	b1, [x27, #254]!
# CHECK-NEXT: [0,7]     .  DE.   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .   DE   str	h1, [x27, #254]!
# CHECK-NEXT: [0,9]     .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       str	s1, [x27], #254
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       str	d1, [x27], #254
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       str	q1, [x27], #254
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       str	b1, [x27, #254]!
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       str	h1, [x27, #254]!
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [87] Code Region - G88

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    2.99
# CHECK-NEXT: IPC:               2.00
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT: Index     012345

# CHECK:      [0,0]     DE   .   str	s1, [x27, #254]!
# CHECK-NEXT: [0,1]     DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .DE  .   str	d1, [x27, #254]!
# CHECK-NEXT: [0,3]     .DE  .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     . DE .   str	q1, [x27, #254]!
# CHECK-NEXT: [0,5]     . DE .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .  DE.   str	w1, [x27], #254
# CHECK-NEXT: [0,7]     .  DE.   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .   DE   str	x1, [x27], #254
# CHECK-NEXT: [0,9]     .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       str	s1, [x27, #254]!
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       str	d1, [x27, #254]!
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       str	q1, [x27, #254]!
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       str	w1, [x27], #254
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       str	x1, [x27], #254
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [88] Code Region - G89

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      1000
# CHECK-NEXT: Total Cycles:      501
# CHECK-NEXT: Total uOps:        1500

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    2.99
# CHECK-NEXT: IPC:               2.00
# CHECK-NEXT: Block RThroughput: 5.0

# CHECK:      Timeline view:
# CHECK-NEXT: Index     012345

# CHECK:      [0,0]     DE   .   str	w1, [x27, #254]!
# CHECK-NEXT: [0,1]     DE   .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     .DE  .   str	x1, [x27, #254]!
# CHECK-NEXT: [0,3]     .DE  .   add	x0, x27, #1
# CHECK-NEXT: [0,4]     . DE .   strb	w1, [x27], #254
# CHECK-NEXT: [0,5]     . DE .   add	x0, x27, #1
# CHECK-NEXT: [0,6]     .  DE.   strb	w1, [x27, #254]!
# CHECK-NEXT: [0,7]     .  DE.   add	x0, x27, #1
# CHECK-NEXT: [0,8]     .   DE   strh	w1, [x27], #254
# CHECK-NEXT: [0,9]     .   DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       str	w1, [x27, #254]!
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       str	x1, [x27, #254]!
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 4.     1     0.0    0.0    0.0       strb	w1, [x27], #254
# CHECK-NEXT: 5.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 6.     1     0.0    0.0    0.0       strb	w1, [x27, #254]!
# CHECK-NEXT: 7.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 8.     1     0.0    0.0    0.0       strh	w1, [x27], #254
# CHECK-NEXT: 9.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [89] Code Region - G90

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      200
# CHECK-NEXT: Total Cycles:      101
# CHECK-NEXT: Total uOps:        300

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    2.97
# CHECK-NEXT: IPC:               1.98
# CHECK-NEXT: Block RThroughput: 1.0

# CHECK:      Timeline view:
# CHECK-NEXT: Index     01

# CHECK:      [0,0]     DE   strh	w1, [x27, #254]!
# CHECK-NEXT: [0,1]     DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       strh	w1, [x27, #254]!
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

# CHECK:      [90] Code Region - G91

# CHECK:      Iterations:        100
# CHECK-NEXT: Instructions:      400
# CHECK-NEXT: Total Cycles:      401
# CHECK-NEXT: Total uOps:        600

# CHECK:      Dispatch Width:    3
# CHECK-NEXT: uOps Per Cycle:    1.50
# CHECK-NEXT: IPC:               1.00
# CHECK-NEXT: Block RThroughput: 2.0

# CHECK:      Timeline view:
# CHECK-NEXT: Index     01234

# CHECK:      [0,0]     DeE .   ldr	x1, [x27], #254
# CHECK-NEXT: [0,1]     .DE .   add	x0, x27, #1
# CHECK-NEXT: [0,2]     . DeE   ldr	x2, [x1], #254
# CHECK-NEXT: [0,3]     .  DE   add	x0, x27, #1

# CHECK:      Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK:            [0]    [1]    [2]    [3]
# CHECK-NEXT: 0.     1     0.0    0.0    0.0       ldr	x1, [x27], #254
# CHECK-NEXT: 1.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT: 2.     1     0.0    0.0    0.0       ldr	x2, [x1], #254
# CHECK-NEXT: 3.     1     0.0    0.0    0.0       add	x0, x27, #1
# CHECK-NEXT:        1     0.0    0.0    0.0       <total>