llvm/llvm/test/CodeGen/X86/AMX/amx-fastconfig-phi.mir

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=x86_64-- -mattr=+amx-tile -run-pass=fastpretileconfig -o - %s | FileCheck %s
#
# This case test tile phi is nested accessed, but the its def block is
# not visited yet.
#
#                  BB.5
#           %6 = phi(%3, b%10) <-----
#            |               |      |
#            |               |      |
#          BB.6             BB.7    |
#            \              /       |
#              \          /         |
#                \      /           |
#                  BB.8 -------------
#            %10 = phi(%8, %9)
#
# #define STRIDE 32
# void foo(int cond, char *buf) {
#   __tile1024i a = {16, 64};
#   __tile1024i b = {16, 64};
#   __tile1024i c = {16, 64};
#
#   if (cond) {
#     __tile_zero(&c);
#   } else {
#     __tile_loadd(&c, buf, STRIDE);
#   }
#   __tile_zero(&a);
#   __tile_zero(&b);
#   for(int i = 0; i < 10; i++) {
#     __tile_dpbssd(&c, a, b);
#     if (cond) {
#       __tile_zero(&c);
#     } else {
#       __tile_loadd(&c, buf, STRIDE);
#     }
#   }
#   __tile_stored(buf, STRIDE, c);
# }
---
name:            foo
alignment:       16
tracksRegLiveness: true
registers:
  - { id: 0, class: gr8 }
  - { id: 1, class: tile }
  - { id: 2, class: tile }
  - { id: 3, class: tile }
  - { id: 4, class: tile }
  - { id: 5, class: tile }
  - { id: 6, class: gr32 }
  - { id: 7, class: tile }
  - { id: 8, class: tile }
  - { id: 9, class: tile }
  - { id: 10, class: tile }
  - { id: 11, class: gr32 }
  - { id: 12, class: gr32 }
  - { id: 13, class: gr32 }
  - { id: 14, class: gr64 }
  - { id: 15, class: gr64 }
  - { id: 16, class: gr8 }
  - { id: 17, class: gr16 }
  - { id: 18, class: gr16 }
  - { id: 19, class: gr64_nosp }
  - { id: 20, class: gr16 }
  - { id: 21, class: gr16 }
  - { id: 22, class: gr32 }
  - { id: 23, class: gr16 }
  - { id: 24, class: gr16 }
  - { id: 25, class: gr16 }
  - { id: 26, class: gr16 }
  - { id: 27, class: gr16 }
  - { id: 28, class: gr16 }
  - { id: 29, class: tile }
  - { id: 30, class: gr16 }
  - { id: 31, class: gr16 }
  - { id: 32, class: gr64_nosp }
  - { id: 33, class: gr16 }
  - { id: 34, class: gr16 }
  - { id: 35, class: gr32 }
  - { id: 36, class: gr64_nosp }
  - { id: 37, class: gr16 }
  - { id: 38, class: gr16 }
liveins:
  - { reg: '$edi', virtual-reg: '%12' }
  - { reg: '$rsi', virtual-reg: '%14' }
frameInfo:
  maxAlignment:    1
machineFunctionInfo:
  amxProgModel: ManagedRA
body:             |
  ; CHECK-LABEL: name: foo
  ; CHECK: bb.0.entry:
  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
  ; CHECK-NEXT:   liveins: $edi, $rsi
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[V_SET0_:%[0-9]+]]:vr128 = V_SET0
  ; CHECK-NEXT:   MOVUPSmr %stack.1, 1, $noreg, 0, $noreg, [[V_SET0_]] :: (store (s512) into %stack.1, align 4)
  ; CHECK-NEXT:   MOVUPSmr %stack.1, 1, $noreg, 16, $noreg, [[V_SET0_]] :: (store (s512) into %stack.1 + 16, align 4)
  ; CHECK-NEXT:   MOVUPSmr %stack.1, 1, $noreg, 32, $noreg, [[V_SET0_]] :: (store (s512) into %stack.1 + 32, align 4)
  ; CHECK-NEXT:   MOVUPSmr %stack.1, 1, $noreg, 48, $noreg, [[V_SET0_]] :: (store (s512) into %stack.1 + 48, align 4)
  ; CHECK-NEXT:   MOV8mi %stack.1, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.1, align 4)
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gr64 = COPY $rsi
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gr32 = COPY $edi
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gr32 = COPY killed [[COPY1]]
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:gr64 = COPY killed [[COPY]]
  ; CHECK-NEXT:   CMP32ri8 [[COPY2]], 0, implicit-def $eflags
  ; CHECK-NEXT:   [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags
  ; CHECK-NEXT:   TEST8ri [[SETCCr]], 1, implicit-def $eflags
  ; CHECK-NEXT:   JCC_1 %bb.2, 5, implicit $eflags
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 64
  ; CHECK-NEXT:   [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 16
  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
  ; CHECK-NEXT:   [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg
  ; CHECK-NEXT:   [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri1]], [[MOV16ri]]
  ; CHECK-NEXT:   [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64
  ; CHECK-NEXT:   TILESTORED %stack.0, 1, killed [[MOV64ri]], 0, $noreg, [[PTILEZEROV]] :: (store (s8192) into %stack.0)
  ; CHECK-NEXT:   JMP_1 %bb.3
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.2:
  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[MOV32ri64_:%[0-9]+]]:gr64_nosp = MOV32ri64 32
  ; CHECK-NEXT:   [[MOV16ri2:%[0-9]+]]:gr16 = MOV16ri 64
  ; CHECK-NEXT:   [[MOV16ri3:%[0-9]+]]:gr16 = MOV16ri 16
  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
  ; CHECK-NEXT:   [[LEA64r1:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg
  ; CHECK-NEXT:   [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri3]], [[MOV16ri2]], [[COPY3]], 1, killed [[MOV32ri64_]], 0, $noreg
  ; CHECK-NEXT:   [[MOV64ri1:%[0-9]+]]:gr64_nosp = MOV64ri 64
  ; CHECK-NEXT:   TILESTORED %stack.2, 1, killed [[MOV64ri1]], 0, $noreg, [[PTILELOADDV]] :: (store (s8192) into %stack.2)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.3:
  ; CHECK-NEXT:   successors: %bb.5(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:gr16 = PHI [[MOV16ri]], %bb.1, [[MOV16ri2]], %bb.2
  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:gr16 = PHI [[MOV16ri1]], %bb.1, [[MOV16ri3]], %bb.2
  ; CHECK-NEXT:   [[PHI2:%[0-9]+]]:gr64_nosp = PHI [[LEA64r]], %bb.1, [[LEA64r1]], %bb.2
  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
  ; CHECK-NEXT:   [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64
  ; CHECK-NEXT:   [[LEA64r2:%[0-9]+]]:gr64_nosp = LEA64r %stack.5, 1, $noreg, 0, $noreg
  ; CHECK-NEXT:   [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri2]], 0, $noreg
  ; CHECK-NEXT:   [[MOV64ri3:%[0-9]+]]:gr64_nosp = MOV64ri 64
  ; CHECK-NEXT:   TILESTORED %stack.5, 1, killed [[MOV64ri3]], 0, $noreg, [[PTILELOADDV1]] :: (store (s8192) into %stack.5)
  ; CHECK-NEXT:   [[MOV16ri4:%[0-9]+]]:gr16 = MOV16ri 64
  ; CHECK-NEXT:   [[MOV16ri5:%[0-9]+]]:gr16 = MOV16ri 16
  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
  ; CHECK-NEXT:   [[PTILEZEROV1:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri5]], [[MOV16ri4]]
  ; CHECK-NEXT:   [[MOV64ri4:%[0-9]+]]:gr64_nosp = MOV64ri 64
  ; CHECK-NEXT:   TILESTORED %stack.4, 1, killed [[MOV64ri4]], 0, $noreg, [[PTILEZEROV1]] :: (store (s8192) into %stack.4)
  ; CHECK-NEXT:   [[MOV16ri6:%[0-9]+]]:gr16 = MOV16ri 64
  ; CHECK-NEXT:   [[MOV16ri7:%[0-9]+]]:gr16 = MOV16ri 16
  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
  ; CHECK-NEXT:   [[PTILEZEROV2:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri7]], [[MOV16ri6]]
  ; CHECK-NEXT:   [[MOV64ri5:%[0-9]+]]:gr64_nosp = MOV64ri 64
  ; CHECK-NEXT:   TILESTORED %stack.3, 1, killed [[MOV64ri5]], 0, $noreg, [[PTILEZEROV2]] :: (store (s8192) into %stack.3)
  ; CHECK-NEXT:   [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags
  ; CHECK-NEXT:   JMP_1 %bb.5
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.4:
  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
  ; CHECK-NEXT:   [[MOV32ri64_1:%[0-9]+]]:gr64_nosp = MOV32ri64 32
  ; CHECK-NEXT:   [[MOV16ri8:%[0-9]+]]:gr16 = MOV16ri 64
  ; CHECK-NEXT:   [[MOV16ri9:%[0-9]+]]:gr16 = MOV16ri 16
  ; CHECK-NEXT:   [[MOV64ri6:%[0-9]+]]:gr64_nosp = MOV64ri 64
  ; CHECK-NEXT:   [[PTILELOADDV2:%[0-9]+]]:tile = PTILELOADDV %59, %60, %stack.9, 1, killed [[MOV64ri6]], 0, $noreg :: (load (s8192) from %stack.9)
  ; CHECK-NEXT:   PTILESTOREDV killed [[MOV16ri9]], killed [[MOV16ri8]], [[COPY3]], 1, killed [[MOV32ri64_1]], 0, $noreg, [[PTILELOADDV2]]
  ; CHECK-NEXT:   RET64
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.5:
  ; CHECK-NEXT:   successors: %bb.7(0x40000000), %bb.6(0x40000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[PHI3:%[0-9]+]]:gr32 = PHI [[MOV32r0_]], %bb.3, %35, %bb.8
  ; CHECK-NEXT:   [[PHI4:%[0-9]+]]:gr16 = PHI [[PHI]], %bb.3, %60, %bb.8
  ; CHECK-NEXT:   [[PHI5:%[0-9]+]]:gr16 = PHI [[PHI1]], %bb.3, %59, %bb.8
  ; CHECK-NEXT:   [[PHI6:%[0-9]+]]:gr64_nosp = PHI [[LEA64r2]], %bb.3, %58, %bb.8
  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
  ; CHECK-NEXT:   [[MOV64ri7:%[0-9]+]]:gr64_nosp = MOV64ri 64
  ; CHECK-NEXT:   [[PTILELOADDV3:%[0-9]+]]:tile = PTILELOADDV [[PHI5]], [[PHI4]], [[PHI6]], 1, killed [[MOV64ri7]], 0, $noreg
  ; CHECK-NEXT:   [[MOV64ri8:%[0-9]+]]:gr64_nosp = MOV64ri 64
  ; CHECK-NEXT:   TILESTORED %stack.8, 1, killed [[MOV64ri8]], 0, $noreg, [[PTILELOADDV3]] :: (store (s8192) into %stack.8)
  ; CHECK-NEXT:   [[MOV16ri10:%[0-9]+]]:gr16 = MOV16ri 64
  ; CHECK-NEXT:   [[MOV16ri11:%[0-9]+]]:gr16 = MOV16ri 16
  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
  ; CHECK-NEXT:   [[MOV64ri9:%[0-9]+]]:gr64_nosp = MOV64ri 64
  ; CHECK-NEXT:   [[PTILELOADDV4:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri7]], [[MOV16ri6]], %stack.3, 1, killed [[MOV64ri9]], 0, $noreg :: (load (s8192) from %stack.3)
  ; CHECK-NEXT:   [[MOV64ri10:%[0-9]+]]:gr64_nosp = MOV64ri 64
  ; CHECK-NEXT:   [[PTILELOADDV5:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri5]], [[MOV16ri4]], %stack.4, 1, killed [[MOV64ri10]], 0, $noreg :: (load (s8192) from %stack.4)
  ; CHECK-NEXT:   [[MOV64ri11:%[0-9]+]]:gr64_nosp = MOV64ri 64
  ; CHECK-NEXT:   [[PTILELOADDV6:%[0-9]+]]:tile = PTILELOADDV [[PHI5]], [[PHI4]], %stack.8, 1, killed [[MOV64ri11]], 0, $noreg :: (load (s8192) from %stack.8)
  ; CHECK-NEXT:   [[PTDPBSSDV:%[0-9]+]]:tile = PTDPBSSDV killed [[MOV16ri11]], [[MOV16ri10]], [[MOV16ri10]], [[PTILELOADDV6]], [[PTILELOADDV5]], [[PTILELOADDV4]]
  ; CHECK-NEXT:   TEST8ri [[SETCCr]], 1, implicit-def $eflags
  ; CHECK-NEXT:   JCC_1 %bb.7, 5, implicit $eflags
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.6:
  ; CHECK-NEXT:   successors: %bb.8(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[MOV16ri12:%[0-9]+]]:gr16 = MOV16ri 64
  ; CHECK-NEXT:   [[MOV16ri13:%[0-9]+]]:gr16 = MOV16ri 16
  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
  ; CHECK-NEXT:   [[LEA64r3:%[0-9]+]]:gr64_nosp = LEA64r %stack.6, 1, $noreg, 0, $noreg
  ; CHECK-NEXT:   [[PTILEZEROV3:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri13]], [[MOV16ri12]]
  ; CHECK-NEXT:   [[MOV64ri12:%[0-9]+]]:gr64_nosp = MOV64ri 64
  ; CHECK-NEXT:   TILESTORED %stack.6, 1, killed [[MOV64ri12]], 0, $noreg, [[PTILEZEROV3]] :: (store (s8192) into %stack.6)
  ; CHECK-NEXT:   JMP_1 %bb.8
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.7:
  ; CHECK-NEXT:   successors: %bb.8(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[MOV32ri64_2:%[0-9]+]]:gr64_nosp = MOV32ri64 32
  ; CHECK-NEXT:   [[MOV16ri14:%[0-9]+]]:gr16 = MOV16ri 64
  ; CHECK-NEXT:   [[MOV16ri15:%[0-9]+]]:gr16 = MOV16ri 16
  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
  ; CHECK-NEXT:   [[LEA64r4:%[0-9]+]]:gr64_nosp = LEA64r %stack.7, 1, $noreg, 0, $noreg
  ; CHECK-NEXT:   [[PTILELOADDV7:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri15]], [[MOV16ri14]], [[COPY3]], 1, killed [[MOV32ri64_2]], 0, $noreg
  ; CHECK-NEXT:   [[MOV64ri13:%[0-9]+]]:gr64_nosp = MOV64ri 64
  ; CHECK-NEXT:   TILESTORED %stack.7, 1, killed [[MOV64ri13]], 0, $noreg, [[PTILELOADDV7]] :: (store (s8192) into %stack.7)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.8:
  ; CHECK-NEXT:   successors: %bb.4(0x40000000), %bb.5(0x40000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[PHI7:%[0-9]+]]:gr16 = PHI [[MOV16ri12]], %bb.6, [[MOV16ri14]], %bb.7
  ; CHECK-NEXT:   [[PHI8:%[0-9]+]]:gr16 = PHI [[MOV16ri13]], %bb.6, [[MOV16ri15]], %bb.7
  ; CHECK-NEXT:   [[PHI9:%[0-9]+]]:gr64_nosp = PHI [[LEA64r3]], %bb.6, [[LEA64r4]], %bb.7
  ; CHECK-NEXT:   PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
  ; CHECK-NEXT:   [[MOV64ri14:%[0-9]+]]:gr64_nosp = MOV64ri 64
  ; CHECK-NEXT:   [[PTILELOADDV8:%[0-9]+]]:tile = PTILELOADDV [[PHI8]], [[PHI7]], [[PHI9]], 1, killed [[MOV64ri14]], 0, $noreg
  ; CHECK-NEXT:   [[MOV64ri15:%[0-9]+]]:gr64_nosp = MOV64ri 64
  ; CHECK-NEXT:   TILESTORED %stack.9, 1, killed [[MOV64ri15]], 0, $noreg, [[PTILELOADDV8]] :: (store (s8192) into %stack.9)
  ; CHECK-NEXT:   [[ADD32ri8_:%[0-9]+]]:gr32 = ADD32ri8 [[PHI3]], 1, implicit-def $eflags
  ; CHECK-NEXT:   CMP32ri8 [[ADD32ri8_]], 10, implicit-def $eflags
  ; CHECK-NEXT:   JCC_1 %bb.4, 4, implicit $eflags
  ; CHECK-NEXT:   JMP_1 %bb.5
  bb.0.entry:
    liveins: $edi, $rsi

    %14:gr64 = COPY $rsi
    %12:gr32 = COPY $edi
    %13:gr32 = COPY killed %12
    %15:gr64 = COPY killed %14
    CMP32ri8 %13, 0, implicit-def $eflags
    %16:gr8 = SETCCr 4, implicit $eflags
    TEST8ri %16, 1, implicit-def $eflags
    JCC_1 %bb.2, 5, implicit $eflags

  bb.1:
    %17:gr16 = MOV16ri 64
    %18:gr16 = MOV16ri 16
    %1:tile = PTILEZEROV killed %18, killed %17
    JMP_1 %bb.3

  bb.2:
    %19:gr64_nosp = MOV32ri64 32
    %20:gr16 = MOV16ri 64
    %21:gr16 = MOV16ri 16
    %2:tile = PTILELOADDV killed %21, killed %20, %15, 1, killed %19, 0, $noreg

  bb.3:

    %3:tile = PHI %1, %bb.1, %2, %bb.2
    %25:gr16 = MOV16ri 64
    %26:gr16 = MOV16ri 16
    %4:tile = PTILEZEROV killed %26, killed %25
    %23:gr16 = MOV16ri 64
    %24:gr16 = MOV16ri 16
    %5:tile = PTILEZEROV killed %24, killed %23
    %22:gr32 = MOV32r0 implicit-def $eflags
    JMP_1 %bb.5

  bb.4:
    %36:gr64_nosp = MOV32ri64 32
    %37:gr16 = MOV16ri 64
    %38:gr16 = MOV16ri 16
    PTILESTOREDV killed %38, killed %37, %15, 1, killed %36, 0, $noreg, %10
    RET64

  bb.5:

    %6:gr32 = PHI %22, %bb.3, %35, %bb.8
    %7:tile = PHI %3, %bb.3, %10, %bb.8
    %27:gr16 = MOV16ri 64
    %28:gr16 = MOV16ri 16
    %29:tile = PTDPBSSDV killed %28, %27, %27, %7, %4, %5
    TEST8ri %16, 1, implicit-def $eflags
    JCC_1 %bb.7, 5, implicit $eflags

  bb.6:
    %30:gr16 = MOV16ri 64
    %31:gr16 = MOV16ri 16
    %8:tile = PTILEZEROV killed %31, killed %30
    JMP_1 %bb.8

  bb.7:
    %32:gr64_nosp = MOV32ri64 32
    %33:gr16 = MOV16ri 64
    %34:gr16 = MOV16ri 16
    %9:tile = PTILELOADDV killed %34, killed %33, %15, 1, killed %32, 0, $noreg

  bb.8:

    %10:tile = PHI %8, %bb.6, %9, %bb.7
    %35:gr32 = ADD32ri8 %6, 1, implicit-def $eflags
    CMP32ri8 %35, 10, implicit-def $eflags
    JCC_1 %bb.4, 4, implicit $eflags
    JMP_1 %bb.5

...