llvm/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux-gnu \
; RUN:     -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \
; RUN:    | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux-gnu \
; RUN:     -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN:     -enable-subreg-liveness < %s | FileCheck %s --check-prefix=TRACKLIVE

%0 = type <{ double }>
%1 = type <{ double }>

define void @acc_regalloc(ptr %arg, ptr %arg1, ptr %arg2) local_unnamed_addr {
; CHECK-LABEL: acc_regalloc:
; CHECK:       # %bb.0: # %bb
; CHECK-NEXT:    lwz r3, 0(r3)
; CHECK-NEXT:    lxv v4, 0(0)
; CHECK-NEXT:    xxlxor v0, v0, v0
; CHECK-NEXT:    xxlxor v1, v1, v1
; CHECK-NEXT:    xxlxor v2, v2, v2
; CHECK-NEXT:    li r6, 1
; CHECK-NEXT:    li r4, 16
; CHECK-NEXT:    stfd f14, -144(r1) # 8-byte Folded Spill
; CHECK-NEXT:    stfd f15, -136(r1) # 8-byte Folded Spill
; CHECK-NEXT:    extswsli r3, r3, 3
; CHECK-NEXT:    xvmaddadp v1, v4, v1
; CHECK-NEXT:    lxvdsx v5, 0, r3
; CHECK-NEXT:    xvmaddadp v0, v5, v0
; CHECK-NEXT:    .p2align 4
; CHECK-NEXT:  .LBB0_1: # %bb9
; CHECK-NEXT:    #
; CHECK-NEXT:    addi r6, r6, 2
; CHECK-NEXT:    lxv vs0, 16(0)
; CHECK-NEXT:    lxv vs1, -64(r5)
; CHECK-NEXT:    xxlxor vs7, vs7, vs7
; CHECK-NEXT:    xxlor vs3, v0, v0
; CHECK-NEXT:    xxlxor vs2, vs2, vs2
; CHECK-NEXT:    xxlxor vs12, vs12, vs12
; CHECK-NEXT:    mulld r6, r6, r3
; CHECK-NEXT:    xxlor vs10, v2, v2
; CHECK-NEXT:    xxlor vs4, v2, v2
; CHECK-NEXT:    xxlor vs8, vs10, vs10
; CHECK-NEXT:    xxlor vs10, v1, v1
; CHECK-NEXT:    xvmaddadp vs7, vs0, v5
; CHECK-NEXT:    xvmuldp vs6, vs0, v2
; CHECK-NEXT:    lxv vs0, -16(r5)
; CHECK-NEXT:    xvmaddadp vs3, vs1, v2
; CHECK-NEXT:    xvmaddadp vs2, vs1, vs2
; CHECK-NEXT:    lxvdsx v6, r6, r4
; CHECK-NEXT:    li r6, 0
; CHECK-NEXT:    xvmaddadp vs7, v2, v2
; CHECK-NEXT:    xvmaddadp vs6, v2, v2
; CHECK-NEXT:    xvmaddadp vs12, vs0, vs12
; CHECK-NEXT:    xvmuldp v3, vs1, v6
; CHECK-NEXT:    xvmuldp vs11, v4, v6
; CHECK-NEXT:    xvmuldp vs13, vs0, v6
; CHECK-NEXT:    xvmuldp vs5, v6, v2
; CHECK-NEXT:    xxlor vs0, v2, v2
; CHECK-NEXT:    xxlor vs14, vs12, vs12
; CHECK-NEXT:    xxlor vs12, v2, v2
; CHECK-NEXT:    xxlor vs1, v3, v3
; CHECK-NEXT:    xxlor vs9, vs11, vs11
; CHECK-NEXT:    xxlor vs15, vs13, vs13
; CHECK-NEXT:    xxmtacc acc1
; CHECK-NEXT:    xxmtacc acc0
; CHECK-NEXT:    xxmtacc acc2
; CHECK-NEXT:    xxmtacc acc3
; CHECK-NEXT:    xvf64gerpp acc0, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc1, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc2, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc3, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc0, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc1, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc2, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc3, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc0, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc1, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc2, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc3, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc0, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc1, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc2, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc3, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc0, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc1, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc2, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc3, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc0, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc1, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc2, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc3, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc0, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc1, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc2, vsp34, vs0
; CHECK-NEXT:    xvf64gerpp acc3, vsp34, vs0
; CHECK-NEXT:    xxmfacc acc0
; CHECK-NEXT:    xxmfacc acc1
; CHECK-NEXT:    xxmfacc acc2
; CHECK-NEXT:    xxmfacc acc3
; CHECK-NEXT:    stxv vs1, 0(r3)
; CHECK-NEXT:    stxv vs9, 32(r3)
; CHECK-NEXT:    stxv vs4, 16(0)
; CHECK-NEXT:    stxv vs12, 48(0)
; CHECK-NEXT:    b .LBB0_1
;
; TRACKLIVE-LABEL: acc_regalloc:
; TRACKLIVE:       # %bb.0: # %bb
; TRACKLIVE-NEXT:    lwz r3, 0(r3)
; TRACKLIVE-NEXT:    lxv v4, 0(0)
; TRACKLIVE-NEXT:    xxlxor v0, v0, v0
; TRACKLIVE-NEXT:    xxlxor v1, v1, v1
; TRACKLIVE-NEXT:    xxlxor v2, v2, v2
; TRACKLIVE-NEXT:    li r6, 1
; TRACKLIVE-NEXT:    li r4, 16
; TRACKLIVE-NEXT:    stfd f14, -144(r1) # 8-byte Folded Spill
; TRACKLIVE-NEXT:    stfd f15, -136(r1) # 8-byte Folded Spill
; TRACKLIVE-NEXT:    extswsli r3, r3, 3
; TRACKLIVE-NEXT:    xvmaddadp v1, v4, v1
; TRACKLIVE-NEXT:    lxvdsx v5, 0, r3
; TRACKLIVE-NEXT:    xvmaddadp v0, v5, v0
; TRACKLIVE-NEXT:    .p2align 4
; TRACKLIVE-NEXT:  .LBB0_1: # %bb9
; TRACKLIVE-NEXT:    #
; TRACKLIVE-NEXT:    addi r6, r6, 2
; TRACKLIVE-NEXT:    lxv vs0, 16(0)
; TRACKLIVE-NEXT:    lxv vs1, -64(r5)
; TRACKLIVE-NEXT:    xxlxor vs7, vs7, vs7
; TRACKLIVE-NEXT:    xxlor vs3, v0, v0
; TRACKLIVE-NEXT:    xxlxor vs2, vs2, vs2
; TRACKLIVE-NEXT:    xxlxor vs12, vs12, vs12
; TRACKLIVE-NEXT:    mulld r6, r6, r3
; TRACKLIVE-NEXT:    xxlor vs10, v2, v2
; TRACKLIVE-NEXT:    xxlor vs4, v2, v2
; TRACKLIVE-NEXT:    xxlor vs8, vs10, vs10
; TRACKLIVE-NEXT:    xxlor vs10, v1, v1
; TRACKLIVE-NEXT:    xvmaddadp vs7, vs0, v5
; TRACKLIVE-NEXT:    xvmuldp vs6, vs0, v2
; TRACKLIVE-NEXT:    lxv vs0, -16(r5)
; TRACKLIVE-NEXT:    xvmaddadp vs3, vs1, v2
; TRACKLIVE-NEXT:    xvmaddadp vs2, vs1, vs2
; TRACKLIVE-NEXT:    lxvdsx v6, r6, r4
; TRACKLIVE-NEXT:    li r6, 0
; TRACKLIVE-NEXT:    xvmaddadp vs7, v2, v2
; TRACKLIVE-NEXT:    xvmaddadp vs6, v2, v2
; TRACKLIVE-NEXT:    xvmaddadp vs12, vs0, vs12
; TRACKLIVE-NEXT:    xvmuldp v3, vs1, v6
; TRACKLIVE-NEXT:    xvmuldp vs11, v4, v6
; TRACKLIVE-NEXT:    xvmuldp vs13, vs0, v6
; TRACKLIVE-NEXT:    xvmuldp vs5, v6, v2
; TRACKLIVE-NEXT:    xxlor vs0, v2, v2
; TRACKLIVE-NEXT:    xxlor vs14, vs12, vs12
; TRACKLIVE-NEXT:    xxlor vs12, v2, v2
; TRACKLIVE-NEXT:    xxlor vs1, v3, v3
; TRACKLIVE-NEXT:    xxlor vs9, vs11, vs11
; TRACKLIVE-NEXT:    xxlor vs15, vs13, vs13
; TRACKLIVE-NEXT:    xxmtacc acc1
; TRACKLIVE-NEXT:    xxmtacc acc0
; TRACKLIVE-NEXT:    xxmtacc acc2
; TRACKLIVE-NEXT:    xxmtacc acc3
; TRACKLIVE-NEXT:    xvf64gerpp acc0, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc1, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc2, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc3, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc0, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc1, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc2, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc3, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc0, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc1, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc2, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc3, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc0, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc1, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc2, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc3, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc0, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc1, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc2, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc3, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc0, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc1, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc2, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc3, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc0, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc1, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc2, vsp34, vs0
; TRACKLIVE-NEXT:    xvf64gerpp acc3, vsp34, vs0
; TRACKLIVE-NEXT:    xxmfacc acc0
; TRACKLIVE-NEXT:    xxmfacc acc1
; TRACKLIVE-NEXT:    xxmfacc acc2
; TRACKLIVE-NEXT:    xxmfacc acc3
; TRACKLIVE-NEXT:    stxv vs1, 0(r3)
; TRACKLIVE-NEXT:    stxv vs9, 32(r3)
; TRACKLIVE-NEXT:    stxv vs4, 16(0)
; TRACKLIVE-NEXT:    stxv vs12, 48(0)
; TRACKLIVE-NEXT:    b .LBB0_1
bb:
  %i = load i32, ptr %arg, align 4
  %i3 = sext i32 %i to i64
  %i4 = shl nsw i64 %i3, 3
  %i6 = getelementptr i8, ptr %arg1, i64 undef
  %i7 = getelementptr [0 x %1], ptr %arg2, i64 0, i64 -8
  %i8 = getelementptr i8, ptr %i6, i64 undef
  br label %bb9

bb9:                                              ; preds = %bb95, %bb
  %i10 = phi i64 [ 1, %bb ], [ 0, %bb95 ]
  %i11 = getelementptr %1, ptr null, i64 2
  %i13 = load <2 x double>, ptr %i11, align 1
  %i14 = add nuw nsw i64 %i10, 2
  %i15 = getelementptr inbounds %1, ptr %i7, i64 undef
  %i17 = load <2 x double>, ptr %i15, align 1
  %i18 = load <2 x double>, ptr null, align 1
  %i19 = getelementptr %1, ptr %i15, i64 6
  %i21 = load <2 x double>, ptr %i19, align 1
  %i22 = load i64, ptr undef, align 8
  %i23 = insertelement <2 x i64> poison, i64 %i22, i32 0
  %i24 = bitcast <2 x i64> %i23 to <2 x double>
  %i25 = shufflevector <2 x double> %i24, <2 x double> undef, <2 x i32> zeroinitializer
  %i26 = mul i64 %i14, %i4
  %i27 = getelementptr i8, ptr null, i64 %i26
  %i29 = getelementptr i8, ptr %i27, i64 16
  %i31 = load i64, ptr %i29, align 8
  %i32 = insertelement <2 x i64> poison, i64 %i31, i32 0
  %i33 = bitcast <2 x i64> %i32 to <2 x double>
  %i34 = shufflevector <2 x double> %i33, <2 x double> undef, <2 x i32> zeroinitializer
  %i35 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> zeroinitializer, <2 x double> %i25, <2 x double> zeroinitializer)
  %i36 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i13, <2 x double> %i25, <2 x double> zeroinitializer)
  %i37 = fmul contract <2 x double> %i13, zeroinitializer
  %i38 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i17, <2 x double> zeroinitializer, <2 x double> %i35)
  %i39 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, <2 x double> %i36)
  %i40 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i17, <2 x double> zeroinitializer, <2 x double> zeroinitializer)
  %i41 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, <2 x double> %i37)
  %i42 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i18, <2 x double> zeroinitializer, <2 x double> zeroinitializer)
  %i43 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i21, <2 x double> zeroinitializer, <2 x double> zeroinitializer)
  %i44 = fmul contract <2 x double> %i17, %i34
  %i45 = fmul contract <2 x double> zeroinitializer, %i34
  %i46 = fmul contract <2 x double> %i18, %i34
  %i47 = fmul contract <2 x double> %i21, %i34
  %i48 = bitcast <2 x double> %i44 to <16 x i8>
  %i49 = bitcast <2 x double> %i40 to <16 x i8>
  %i50 = bitcast <2 x double> %i38 to <16 x i8>
  %i51 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> %i48, <16 x i8> %i49, <16 x i8> %i50)
  %i52 = bitcast <2 x double> %i45 to <16 x i8>
  %i53 = bitcast <2 x double> %i41 to <16 x i8>
  %i54 = bitcast <2 x double> %i39 to <16 x i8>
  %i55 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> %i52, <16 x i8> %i53, <16 x i8> %i54)
  %i56 = bitcast <2 x double> %i46 to <16 x i8>
  %i57 = bitcast <2 x double> %i42 to <16 x i8>
  %i58 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> %i56, <16 x i8> %i57, <16 x i8> %i56)
  %i59 = bitcast <2 x double> %i47 to <16 x i8>
  %i60 = bitcast <2 x double> %i43 to <16 x i8>
  %i61 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> %i59, <16 x i8> %i60, <16 x i8> %i59)
  %i62 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i51, <256 x i1> undef, <16 x i8> undef)
  %i63 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i55, <256 x i1> undef, <16 x i8> undef)
  %i64 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i58, <256 x i1> undef, <16 x i8> undef)
  %i65 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i61, <256 x i1> undef, <16 x i8> undef)
  %i66 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i62, <256 x i1> undef, <16 x i8> undef)
  %i67 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i63, <256 x i1> undef, <16 x i8> undef)
  %i68 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i64, <256 x i1> undef, <16 x i8> undef)
  %i69 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i65, <256 x i1> undef, <16 x i8> undef)
  %i70 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i66, <256 x i1> undef, <16 x i8> undef)
  %i71 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i67, <256 x i1> undef, <16 x i8> undef)
  %i72 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i68, <256 x i1> undef, <16 x i8> undef)
  %i73 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i69, <256 x i1> undef, <16 x i8> undef)
  %i74 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i70, <256 x i1> undef, <16 x i8> undef)
  %i75 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i71, <256 x i1> undef, <16 x i8> undef)
  %i76 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i72, <256 x i1> undef, <16 x i8> undef)
  %i77 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i73, <256 x i1> undef, <16 x i8> undef)
  %i78 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i74, <256 x i1> undef, <16 x i8> undef)
  %i79 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i75, <256 x i1> undef, <16 x i8> undef)
  %i80 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i76, <256 x i1> undef, <16 x i8> undef)
  %i81 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i77, <256 x i1> undef, <16 x i8> undef)
  br label %bb82

bb82:                                             ; preds = %bb82, %bb9
  %i83 = phi <512 x i1> [ %i94, %bb82 ], [ %i81, %bb9 ]
  %i84 = phi <512 x i1> [ %i93, %bb82 ], [ %i80, %bb9 ]
  %i85 = phi <512 x i1> [ %i92, %bb82 ], [ %i79, %bb9 ]
  %i86 = phi <512 x i1> [ %i91, %bb82 ], [ %i78, %bb9 ]
  %i87 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i86, <256 x i1> undef, <16 x i8> undef)
  %i88 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i85, <256 x i1> undef, <16 x i8> undef)
  %i89 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i84, <256 x i1> undef, <16 x i8> undef)
  %i90 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i83, <256 x i1> undef, <16 x i8> undef)
  %i91 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i87, <256 x i1> undef, <16 x i8> undef)
  %i92 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i88, <256 x i1> undef, <16 x i8> undef)
  %i93 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i89, <256 x i1> undef, <16 x i8> undef)
  %i94 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i90, <256 x i1> undef, <16 x i8> undef)
  br i1 undef, label %bb95, label %bb82

bb95:                                             ; preds = %bb82
  %i96 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %i91)
  %i97 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %i96, 2
  %i98 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %i92)
  %i99 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %i98, 3
  %i100 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %i93)
  %i101 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %i100, 2
  %i102 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %i94)
  %i103 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %i102, 3
  %i104 = getelementptr inbounds i8, ptr %i8, i64 undef
  store <16 x i8> %i97, ptr %i104, align 1
  %i106 = getelementptr i8, ptr %i104, i64 32
  store <16 x i8> %i101, ptr %i106, align 1
  %i108 = getelementptr i8, ptr null, i64 16
  store <16 x i8> %i99, ptr %i108, align 1
  %i110 = getelementptr i8, ptr null, i64 48
  store <16 x i8> %i103, ptr %i110, align 1
  br label %bb9
}

declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
declare <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1>, <256 x i1>, <16 x i8>)
declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>)