llvm/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,LE,LE-64BIT
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu   | FileCheck %s --check-prefixes=ALL,BE
; RUN: llc < %s -mtriple=ppc32--                       | FileCheck %s --check-prefixes=ALL,LE,LE-32BIT

define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; ALL-LABEL: lshr_4bytes:
; ALL:       # %bb.0:
; ALL-NEXT:    lwz 4, 0(4)
; ALL-NEXT:    lwz 3, 0(3)
; ALL-NEXT:    slwi 4, 4, 3
; ALL-NEXT:    srw 3, 3, 4
; ALL-NEXT:    stw 3, 0(5)
; ALL-NEXT:    blr
  %src = load i32, ptr %src.ptr, align 1
  %byteOff = load i32, ptr %byteOff.ptr, align 1
  %bitOff = shl i32 %byteOff, 3
  %res = lshr i32 %src, %bitOff
  store i32 %res, ptr %dst, align 1
  ret void
}
define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; ALL-LABEL: shl_4bytes:
; ALL:       # %bb.0:
; ALL-NEXT:    lwz 4, 0(4)
; ALL-NEXT:    lwz 3, 0(3)
; ALL-NEXT:    slwi 4, 4, 3
; ALL-NEXT:    slw 3, 3, 4
; ALL-NEXT:    stw 3, 0(5)
; ALL-NEXT:    blr
  %src = load i32, ptr %src.ptr, align 1
  %byteOff = load i32, ptr %byteOff.ptr, align 1
  %bitOff = shl i32 %byteOff, 3
  %res = shl i32 %src, %bitOff
  store i32 %res, ptr %dst, align 1
  ret void
}
define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; ALL-LABEL: ashr_4bytes:
; ALL:       # %bb.0:
; ALL-NEXT:    lwz 4, 0(4)
; ALL-NEXT:    lwz 3, 0(3)
; ALL-NEXT:    slwi 4, 4, 3
; ALL-NEXT:    sraw 3, 3, 4
; ALL-NEXT:    stw 3, 0(5)
; ALL-NEXT:    blr
  %src = load i32, ptr %src.ptr, align 1
  %byteOff = load i32, ptr %byteOff.ptr, align 1
  %bitOff = shl i32 %byteOff, 3
  %res = ashr i32 %src, %bitOff
  store i32 %res, ptr %dst, align 1
  ret void
}

define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; LE-64BIT-LABEL: lshr_8bytes:
; LE-64BIT:       # %bb.0:
; LE-64BIT-NEXT:    lwz 4, 0(4)
; LE-64BIT-NEXT:    ld 3, 0(3)
; LE-64BIT-NEXT:    slwi 4, 4, 3
; LE-64BIT-NEXT:    srd 3, 3, 4
; LE-64BIT-NEXT:    std 3, 0(5)
; LE-64BIT-NEXT:    blr
;
; BE-LABEL: lshr_8bytes:
; BE:       # %bb.0:
; BE-NEXT:    lwz 4, 4(4)
; BE-NEXT:    ld 3, 0(3)
; BE-NEXT:    slwi 4, 4, 3
; BE-NEXT:    srd 3, 3, 4
; BE-NEXT:    std 3, 0(5)
; BE-NEXT:    blr
;
; LE-32BIT-LABEL: lshr_8bytes:
; LE-32BIT:       # %bb.0:
; LE-32BIT-NEXT:    lwz 4, 4(4)
; LE-32BIT-NEXT:    lwz 6, 4(3)
; LE-32BIT-NEXT:    lwz 3, 0(3)
; LE-32BIT-NEXT:    slwi 4, 4, 3
; LE-32BIT-NEXT:    subfic 7, 4, 32
; LE-32BIT-NEXT:    srw 6, 6, 4
; LE-32BIT-NEXT:    addi 8, 4, -32
; LE-32BIT-NEXT:    slw 7, 3, 7
; LE-32BIT-NEXT:    srw 4, 3, 4
; LE-32BIT-NEXT:    srw 3, 3, 8
; LE-32BIT-NEXT:    or 6, 6, 7
; LE-32BIT-NEXT:    or 3, 6, 3
; LE-32BIT-NEXT:    stw 4, 0(5)
; LE-32BIT-NEXT:    stw 3, 4(5)
; LE-32BIT-NEXT:    blr
  %src = load i64, ptr %src.ptr, align 1
  %byteOff = load i64, ptr %byteOff.ptr, align 1
  %bitOff = shl i64 %byteOff, 3
  %res = lshr i64 %src, %bitOff
  store i64 %res, ptr %dst, align 1
  ret void
}
define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; LE-64BIT-LABEL: shl_8bytes:
; LE-64BIT:       # %bb.0:
; LE-64BIT-NEXT:    lwz 4, 0(4)
; LE-64BIT-NEXT:    ld 3, 0(3)
; LE-64BIT-NEXT:    slwi 4, 4, 3
; LE-64BIT-NEXT:    sld 3, 3, 4
; LE-64BIT-NEXT:    std 3, 0(5)
; LE-64BIT-NEXT:    blr
;
; BE-LABEL: shl_8bytes:
; BE:       # %bb.0:
; BE-NEXT:    lwz 4, 4(4)
; BE-NEXT:    ld 3, 0(3)
; BE-NEXT:    slwi 4, 4, 3
; BE-NEXT:    sld 3, 3, 4
; BE-NEXT:    std 3, 0(5)
; BE-NEXT:    blr
;
; LE-32BIT-LABEL: shl_8bytes:
; LE-32BIT:       # %bb.0:
; LE-32BIT-NEXT:    lwz 4, 4(4)
; LE-32BIT-NEXT:    lwz 6, 0(3)
; LE-32BIT-NEXT:    lwz 3, 4(3)
; LE-32BIT-NEXT:    slwi 4, 4, 3
; LE-32BIT-NEXT:    subfic 7, 4, 32
; LE-32BIT-NEXT:    slw 6, 6, 4
; LE-32BIT-NEXT:    addi 8, 4, -32
; LE-32BIT-NEXT:    srw 7, 3, 7
; LE-32BIT-NEXT:    slw 4, 3, 4
; LE-32BIT-NEXT:    slw 3, 3, 8
; LE-32BIT-NEXT:    or 6, 6, 7
; LE-32BIT-NEXT:    or 3, 6, 3
; LE-32BIT-NEXT:    stw 4, 4(5)
; LE-32BIT-NEXT:    stw 3, 0(5)
; LE-32BIT-NEXT:    blr
  %src = load i64, ptr %src.ptr, align 1
  %byteOff = load i64, ptr %byteOff.ptr, align 1
  %bitOff = shl i64 %byteOff, 3
  %res = shl i64 %src, %bitOff
  store i64 %res, ptr %dst, align 1
  ret void
}
define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; LE-64BIT-LABEL: ashr_8bytes:
; LE-64BIT:       # %bb.0:
; LE-64BIT-NEXT:    lwz 4, 0(4)
; LE-64BIT-NEXT:    ld 3, 0(3)
; LE-64BIT-NEXT:    slwi 4, 4, 3
; LE-64BIT-NEXT:    srad 3, 3, 4
; LE-64BIT-NEXT:    std 3, 0(5)
; LE-64BIT-NEXT:    blr
;
; BE-LABEL: ashr_8bytes:
; BE:       # %bb.0:
; BE-NEXT:    lwz 4, 4(4)
; BE-NEXT:    ld 3, 0(3)
; BE-NEXT:    slwi 4, 4, 3
; BE-NEXT:    srad 3, 3, 4
; BE-NEXT:    std 3, 0(5)
; BE-NEXT:    blr
;
; LE-32BIT-LABEL: ashr_8bytes:
; LE-32BIT:       # %bb.0:
; LE-32BIT-NEXT:    lwz 4, 4(4)
; LE-32BIT-NEXT:    lwz 6, 0(3)
; LE-32BIT-NEXT:    slwi 4, 4, 3
; LE-32BIT-NEXT:    addi 7, 4, -32
; LE-32BIT-NEXT:    cmpwi 7, 0
; LE-32BIT-NEXT:    ble 0, .LBB5_2
; LE-32BIT-NEXT:  # %bb.1:
; LE-32BIT-NEXT:    sraw 3, 6, 7
; LE-32BIT-NEXT:    b .LBB5_3
; LE-32BIT-NEXT:  .LBB5_2:
; LE-32BIT-NEXT:    lwz 3, 4(3)
; LE-32BIT-NEXT:    subfic 7, 4, 32
; LE-32BIT-NEXT:    slw 7, 6, 7
; LE-32BIT-NEXT:    srw 3, 3, 4
; LE-32BIT-NEXT:    or 3, 3, 7
; LE-32BIT-NEXT:  .LBB5_3:
; LE-32BIT-NEXT:    sraw 4, 6, 4
; LE-32BIT-NEXT:    stw 4, 0(5)
; LE-32BIT-NEXT:    stw 3, 4(5)
; LE-32BIT-NEXT:    blr
  %src = load i64, ptr %src.ptr, align 1
  %byteOff = load i64, ptr %byteOff.ptr, align 1
  %bitOff = shl i64 %byteOff, 3
  %res = ashr i64 %src, %bitOff
  store i64 %res, ptr %dst, align 1
  ret void
}

define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; LE-64BIT-LABEL: lshr_16bytes:
; LE-64BIT:       # %bb.0:
; LE-64BIT-NEXT:    lwz 4, 0(4)
; LE-64BIT-NEXT:    ld 6, 8(3)
; LE-64BIT-NEXT:    ld 3, 0(3)
; LE-64BIT-NEXT:    slwi 4, 4, 3
; LE-64BIT-NEXT:    subfic 7, 4, 64
; LE-64BIT-NEXT:    srd 3, 3, 4
; LE-64BIT-NEXT:    sld 7, 6, 7
; LE-64BIT-NEXT:    or 3, 3, 7
; LE-64BIT-NEXT:    addi 7, 4, -64
; LE-64BIT-NEXT:    srd 4, 6, 4
; LE-64BIT-NEXT:    srd 7, 6, 7
; LE-64BIT-NEXT:    std 4, 8(5)
; LE-64BIT-NEXT:    or 3, 3, 7
; LE-64BIT-NEXT:    std 3, 0(5)
; LE-64BIT-NEXT:    blr
;
; BE-LABEL: lshr_16bytes:
; BE:       # %bb.0:
; BE-NEXT:    lwz 4, 12(4)
; BE-NEXT:    ld 6, 0(3)
; BE-NEXT:    ld 3, 8(3)
; BE-NEXT:    slwi 4, 4, 3
; BE-NEXT:    subfic 7, 4, 64
; BE-NEXT:    srd 3, 3, 4
; BE-NEXT:    sld 7, 6, 7
; BE-NEXT:    addi 8, 4, -64
; BE-NEXT:    or 3, 3, 7
; BE-NEXT:    srd 7, 6, 8
; BE-NEXT:    srd 4, 6, 4
; BE-NEXT:    or 3, 3, 7
; BE-NEXT:    std 4, 0(5)
; BE-NEXT:    std 3, 8(5)
; BE-NEXT:    blr
;
; LE-32BIT-LABEL: lshr_16bytes:
; LE-32BIT:       # %bb.0:
; LE-32BIT-NEXT:    stwu 1, -48(1)
; LE-32BIT-NEXT:    lwz 7, 0(3)
; LE-32BIT-NEXT:    li 6, 0
; LE-32BIT-NEXT:    lwz 8, 4(3)
; LE-32BIT-NEXT:    lwz 9, 8(3)
; LE-32BIT-NEXT:    lwz 3, 12(3)
; LE-32BIT-NEXT:    lwz 4, 12(4)
; LE-32BIT-NEXT:    stw 6, 28(1)
; LE-32BIT-NEXT:    stw 6, 24(1)
; LE-32BIT-NEXT:    stw 6, 20(1)
; LE-32BIT-NEXT:    stw 6, 16(1)
; LE-32BIT-NEXT:    rlwinm 6, 4, 0, 28, 29
; LE-32BIT-NEXT:    stw 3, 44(1)
; LE-32BIT-NEXT:    addi 3, 1, 32
; LE-32BIT-NEXT:    stw 9, 40(1)
; LE-32BIT-NEXT:    sub 3, 3, 6
; LE-32BIT-NEXT:    stw 8, 36(1)
; LE-32BIT-NEXT:    rlwinm 4, 4, 3, 27, 28
; LE-32BIT-NEXT:    stw 7, 32(1)
; LE-32BIT-NEXT:    subfic 9, 4, 32
; LE-32BIT-NEXT:    lwz 6, 4(3)
; LE-32BIT-NEXT:    lwz 7, 0(3)
; LE-32BIT-NEXT:    lwz 8, 12(3)
; LE-32BIT-NEXT:    srw 10, 6, 4
; LE-32BIT-NEXT:    lwz 3, 8(3)
; LE-32BIT-NEXT:    slw 11, 7, 9
; LE-32BIT-NEXT:    slw 6, 6, 9
; LE-32BIT-NEXT:    srw 8, 8, 4
; LE-32BIT-NEXT:    slw 9, 3, 9
; LE-32BIT-NEXT:    srw 3, 3, 4
; LE-32BIT-NEXT:    or 3, 6, 3
; LE-32BIT-NEXT:    stw 3, 8(5)
; LE-32BIT-NEXT:    or 3, 9, 8
; LE-32BIT-NEXT:    srw 4, 7, 4
; LE-32BIT-NEXT:    stw 3, 12(5)
; LE-32BIT-NEXT:    or 3, 11, 10
; LE-32BIT-NEXT:    stw 4, 0(5)
; LE-32BIT-NEXT:    stw 3, 4(5)
; LE-32BIT-NEXT:    addi 1, 1, 48
; LE-32BIT-NEXT:    blr
  %src = load i128, ptr %src.ptr, align 1
  %byteOff = load i128, ptr %byteOff.ptr, align 1
  %bitOff = shl i128 %byteOff, 3
  %res = lshr i128 %src, %bitOff
  store i128 %res, ptr %dst, align 1
  ret void
}

define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind {
; LE-64BIT-LABEL: lshr_16bytes_wordOff:
; LE-64BIT:       # %bb.0:
; LE-64BIT-NEXT:    lwz 4, 0(4)
; LE-64BIT-NEXT:    ld 6, 8(3)
; LE-64BIT-NEXT:    ld 3, 0(3)
; LE-64BIT-NEXT:    slwi 4, 4, 5
; LE-64BIT-NEXT:    subfic 7, 4, 64
; LE-64BIT-NEXT:    srd 3, 3, 4
; LE-64BIT-NEXT:    sld 7, 6, 7
; LE-64BIT-NEXT:    or 3, 3, 7
; LE-64BIT-NEXT:    addi 7, 4, -64
; LE-64BIT-NEXT:    srd 4, 6, 4
; LE-64BIT-NEXT:    srd 7, 6, 7
; LE-64BIT-NEXT:    std 4, 8(5)
; LE-64BIT-NEXT:    or 3, 3, 7
; LE-64BIT-NEXT:    std 3, 0(5)
; LE-64BIT-NEXT:    blr
;
; BE-LABEL: lshr_16bytes_wordOff:
; BE:       # %bb.0:
; BE-NEXT:    lwz 4, 12(4)
; BE-NEXT:    ld 6, 0(3)
; BE-NEXT:    ld 3, 8(3)
; BE-NEXT:    slwi 4, 4, 5
; BE-NEXT:    subfic 7, 4, 64
; BE-NEXT:    srd 3, 3, 4
; BE-NEXT:    sld 7, 6, 7
; BE-NEXT:    addi 8, 4, -64
; BE-NEXT:    or 3, 3, 7
; BE-NEXT:    srd 7, 6, 8
; BE-NEXT:    srd 4, 6, 4
; BE-NEXT:    or 3, 3, 7
; BE-NEXT:    std 4, 0(5)
; BE-NEXT:    std 3, 8(5)
; BE-NEXT:    blr
;
; LE-32BIT-LABEL: lshr_16bytes_wordOff:
; LE-32BIT:       # %bb.0:
; LE-32BIT-NEXT:    stwu 1, -48(1)
; LE-32BIT-NEXT:    lwz 7, 0(3)
; LE-32BIT-NEXT:    li 6, 0
; LE-32BIT-NEXT:    lwz 8, 4(3)
; LE-32BIT-NEXT:    lwz 9, 8(3)
; LE-32BIT-NEXT:    lwz 3, 12(3)
; LE-32BIT-NEXT:    lwz 4, 12(4)
; LE-32BIT-NEXT:    stw 3, 44(1)
; LE-32BIT-NEXT:    addi 3, 1, 32
; LE-32BIT-NEXT:    rlwinm 4, 4, 2, 28, 29
; LE-32BIT-NEXT:    stw 6, 28(1)
; LE-32BIT-NEXT:    sub 3, 3, 4
; LE-32BIT-NEXT:    stw 6, 24(1)
; LE-32BIT-NEXT:    stw 6, 20(1)
; LE-32BIT-NEXT:    stw 6, 16(1)
; LE-32BIT-NEXT:    stw 9, 40(1)
; LE-32BIT-NEXT:    stw 8, 36(1)
; LE-32BIT-NEXT:    stw 7, 32(1)
; LE-32BIT-NEXT:    lwz 4, 4(3)
; LE-32BIT-NEXT:    lwz 6, 0(3)
; LE-32BIT-NEXT:    lwz 7, 8(3)
; LE-32BIT-NEXT:    lwz 3, 12(3)
; LE-32BIT-NEXT:    stw 7, 8(5)
; LE-32BIT-NEXT:    stw 3, 12(5)
; LE-32BIT-NEXT:    stw 6, 0(5)
; LE-32BIT-NEXT:    stw 4, 4(5)
; LE-32BIT-NEXT:    addi 1, 1, 48
; LE-32BIT-NEXT:    blr
  %src = load i128, ptr %src.ptr, align 1
  %wordOff = load i128, ptr %wordOff.ptr, align 1
  %bitOff = shl i128 %wordOff, 5
  %res = lshr i128 %src, %bitOff
  store i128 %res, ptr %dst, align 1
  ret void
}

define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; LE-64BIT-LABEL: shl_16bytes:
; LE-64BIT:       # %bb.0:
; LE-64BIT-NEXT:    lwz 4, 0(4)
; LE-64BIT-NEXT:    ld 6, 0(3)
; LE-64BIT-NEXT:    ld 3, 8(3)
; LE-64BIT-NEXT:    slwi 4, 4, 3
; LE-64BIT-NEXT:    subfic 7, 4, 64
; LE-64BIT-NEXT:    sld 3, 3, 4
; LE-64BIT-NEXT:    srd 7, 6, 7
; LE-64BIT-NEXT:    or 3, 3, 7
; LE-64BIT-NEXT:    addi 7, 4, -64
; LE-64BIT-NEXT:    sld 4, 6, 4
; LE-64BIT-NEXT:    sld 7, 6, 7
; LE-64BIT-NEXT:    std 4, 0(5)
; LE-64BIT-NEXT:    or 3, 3, 7
; LE-64BIT-NEXT:    std 3, 8(5)
; LE-64BIT-NEXT:    blr
;
; BE-LABEL: shl_16bytes:
; BE:       # %bb.0:
; BE-NEXT:    lwz 4, 12(4)
; BE-NEXT:    ld 6, 8(3)
; BE-NEXT:    ld 3, 0(3)
; BE-NEXT:    slwi 4, 4, 3
; BE-NEXT:    subfic 7, 4, 64
; BE-NEXT:    sld 3, 3, 4
; BE-NEXT:    srd 7, 6, 7
; BE-NEXT:    addi 8, 4, -64
; BE-NEXT:    or 3, 3, 7
; BE-NEXT:    sld 7, 6, 8
; BE-NEXT:    sld 4, 6, 4
; BE-NEXT:    or 3, 3, 7
; BE-NEXT:    std 4, 8(5)
; BE-NEXT:    std 3, 0(5)
; BE-NEXT:    blr
;
; LE-32BIT-LABEL: shl_16bytes:
; LE-32BIT:       # %bb.0:
; LE-32BIT-NEXT:    stwu 1, -48(1)
; LE-32BIT-NEXT:    lwz 7, 0(3)
; LE-32BIT-NEXT:    li 6, 0
; LE-32BIT-NEXT:    lwz 8, 4(3)
; LE-32BIT-NEXT:    lwz 9, 8(3)
; LE-32BIT-NEXT:    lwz 3, 12(3)
; LE-32BIT-NEXT:    lwz 4, 12(4)
; LE-32BIT-NEXT:    stw 6, 44(1)
; LE-32BIT-NEXT:    stw 6, 40(1)
; LE-32BIT-NEXT:    stw 6, 36(1)
; LE-32BIT-NEXT:    stw 6, 32(1)
; LE-32BIT-NEXT:    rlwinm 6, 4, 0, 28, 29
; LE-32BIT-NEXT:    stw 3, 28(1)
; LE-32BIT-NEXT:    addi 3, 1, 16
; LE-32BIT-NEXT:    stw 9, 24(1)
; LE-32BIT-NEXT:    rlwinm 4, 4, 3, 27, 28
; LE-32BIT-NEXT:    stw 8, 20(1)
; LE-32BIT-NEXT:    subfic 8, 4, 32
; LE-32BIT-NEXT:    stw 7, 16(1)
; LE-32BIT-NEXT:    lwzux 3, 6, 3
; LE-32BIT-NEXT:    lwz 9, 4(6)
; LE-32BIT-NEXT:    slw 3, 3, 4
; LE-32BIT-NEXT:    lwz 7, 8(6)
; LE-32BIT-NEXT:    lwz 6, 12(6)
; LE-32BIT-NEXT:    slw 11, 9, 4
; LE-32BIT-NEXT:    srw 9, 9, 8
; LE-32BIT-NEXT:    srw 10, 7, 8
; LE-32BIT-NEXT:    srw 8, 6, 8
; LE-32BIT-NEXT:    slw 7, 7, 4
; LE-32BIT-NEXT:    slw 4, 6, 4
; LE-32BIT-NEXT:    or 3, 3, 9
; LE-32BIT-NEXT:    stw 4, 12(5)
; LE-32BIT-NEXT:    or 4, 7, 8
; LE-32BIT-NEXT:    stw 3, 0(5)
; LE-32BIT-NEXT:    or 3, 11, 10
; LE-32BIT-NEXT:    stw 4, 8(5)
; LE-32BIT-NEXT:    stw 3, 4(5)
; LE-32BIT-NEXT:    addi 1, 1, 48
; LE-32BIT-NEXT:    blr
  %src = load i128, ptr %src.ptr, align 1
  %byteOff = load i128, ptr %byteOff.ptr, align 1
  %bitOff = shl i128 %byteOff, 3
  %res = shl i128 %src, %bitOff
  store i128 %res, ptr %dst, align 1
  ret void
}

define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind {
; LE-64BIT-LABEL: shl_16bytes_wordOff:
; LE-64BIT:       # %bb.0:
; LE-64BIT-NEXT:    lwz 4, 0(4)
; LE-64BIT-NEXT:    ld 6, 0(3)
; LE-64BIT-NEXT:    ld 3, 8(3)
; LE-64BIT-NEXT:    slwi 4, 4, 5
; LE-64BIT-NEXT:    subfic 7, 4, 64
; LE-64BIT-NEXT:    sld 3, 3, 4
; LE-64BIT-NEXT:    srd 7, 6, 7
; LE-64BIT-NEXT:    or 3, 3, 7
; LE-64BIT-NEXT:    addi 7, 4, -64
; LE-64BIT-NEXT:    sld 4, 6, 4
; LE-64BIT-NEXT:    sld 7, 6, 7
; LE-64BIT-NEXT:    std 4, 0(5)
; LE-64BIT-NEXT:    or 3, 3, 7
; LE-64BIT-NEXT:    std 3, 8(5)
; LE-64BIT-NEXT:    blr
;
; BE-LABEL: shl_16bytes_wordOff:
; BE:       # %bb.0:
; BE-NEXT:    lwz 4, 12(4)
; BE-NEXT:    ld 6, 8(3)
; BE-NEXT:    ld 3, 0(3)
; BE-NEXT:    slwi 4, 4, 5
; BE-NEXT:    subfic 7, 4, 64
; BE-NEXT:    sld 3, 3, 4
; BE-NEXT:    srd 7, 6, 7
; BE-NEXT:    addi 8, 4, -64
; BE-NEXT:    or 3, 3, 7
; BE-NEXT:    sld 7, 6, 8
; BE-NEXT:    sld 4, 6, 4
; BE-NEXT:    or 3, 3, 7
; BE-NEXT:    std 4, 8(5)
; BE-NEXT:    std 3, 0(5)
; BE-NEXT:    blr
;
; LE-32BIT-LABEL: shl_16bytes_wordOff:
; LE-32BIT:       # %bb.0:
; LE-32BIT-NEXT:    stwu 1, -48(1)
; LE-32BIT-NEXT:    lwz 7, 0(3)
; LE-32BIT-NEXT:    li 6, 0
; LE-32BIT-NEXT:    lwz 8, 4(3)
; LE-32BIT-NEXT:    lwz 9, 8(3)
; LE-32BIT-NEXT:    lwz 3, 12(3)
; LE-32BIT-NEXT:    lwz 4, 12(4)
; LE-32BIT-NEXT:    stw 6, 44(1)
; LE-32BIT-NEXT:    stw 6, 40(1)
; LE-32BIT-NEXT:    rlwinm 4, 4, 2, 28, 29
; LE-32BIT-NEXT:    stw 6, 36(1)
; LE-32BIT-NEXT:    stw 6, 32(1)
; LE-32BIT-NEXT:    stw 3, 28(1)
; LE-32BIT-NEXT:    addi 3, 1, 16
; LE-32BIT-NEXT:    stw 9, 24(1)
; LE-32BIT-NEXT:    stw 8, 20(1)
; LE-32BIT-NEXT:    stw 7, 16(1)
; LE-32BIT-NEXT:    lwzux 3, 4, 3
; LE-32BIT-NEXT:    lwz 6, 4(4)
; LE-32BIT-NEXT:    lwz 7, 12(4)
; LE-32BIT-NEXT:    lwz 4, 8(4)
; LE-32BIT-NEXT:    stw 3, 0(5)
; LE-32BIT-NEXT:    stw 4, 8(5)
; LE-32BIT-NEXT:    stw 7, 12(5)
; LE-32BIT-NEXT:    stw 6, 4(5)
; LE-32BIT-NEXT:    addi 1, 1, 48
; LE-32BIT-NEXT:    blr
  %src = load i128, ptr %src.ptr, align 1
  %wordOff = load i128, ptr %wordOff.ptr, align 1
  %bitOff = shl i128 %wordOff, 5
  %res = shl i128 %src, %bitOff
  store i128 %res, ptr %dst, align 1
  ret void
}

define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; LE-64BIT-LABEL: ashr_16bytes:
; LE-64BIT:       # %bb.0:
; LE-64BIT-NEXT:    lwz 4, 0(4)
; LE-64BIT-NEXT:    ld 6, 8(3)
; LE-64BIT-NEXT:    ld 3, 0(3)
; LE-64BIT-NEXT:    slwi 4, 4, 3
; LE-64BIT-NEXT:    subfic 7, 4, 64
; LE-64BIT-NEXT:    srd 3, 3, 4
; LE-64BIT-NEXT:    sld 7, 6, 7
; LE-64BIT-NEXT:    or 3, 3, 7
; LE-64BIT-NEXT:    addi 7, 4, -64
; LE-64BIT-NEXT:    srad 4, 6, 4
; LE-64BIT-NEXT:    cmpwi 7, 1
; LE-64BIT-NEXT:    srad 8, 6, 7
; LE-64BIT-NEXT:    std 4, 8(5)
; LE-64BIT-NEXT:    isellt 3, 3, 8
; LE-64BIT-NEXT:    std 3, 0(5)
; LE-64BIT-NEXT:    blr
;
; BE-LABEL: ashr_16bytes:
; BE:       # %bb.0:
; BE-NEXT:    lwz 4, 12(4)
; BE-NEXT:    ld 6, 0(3)
; BE-NEXT:    slwi 4, 4, 3
; BE-NEXT:    addi 7, 4, -64
; BE-NEXT:    cmpwi 7, 1
; BE-NEXT:    blt 0, .LBB10_2
; BE-NEXT:  # %bb.1:
; BE-NEXT:    srad 3, 6, 7
; BE-NEXT:    b .LBB10_3
; BE-NEXT:  .LBB10_2:
; BE-NEXT:    ld 3, 8(3)
; BE-NEXT:    subfic 7, 4, 64
; BE-NEXT:    sld 7, 6, 7
; BE-NEXT:    srd 3, 3, 4
; BE-NEXT:    or 3, 3, 7
; BE-NEXT:  .LBB10_3:
; BE-NEXT:    srad 4, 6, 4
; BE-NEXT:    std 3, 8(5)
; BE-NEXT:    std 4, 0(5)
; BE-NEXT:    blr
;
; LE-32BIT-LABEL: ashr_16bytes:
; LE-32BIT:       # %bb.0:
; LE-32BIT-NEXT:    stwu 1, -48(1)
; LE-32BIT-NEXT:    lwz 7, 0(3)
; LE-32BIT-NEXT:    addi 6, 1, 32
; LE-32BIT-NEXT:    lwz 8, 4(3)
; LE-32BIT-NEXT:    lwz 9, 8(3)
; LE-32BIT-NEXT:    lwz 3, 12(3)
; LE-32BIT-NEXT:    lwz 4, 12(4)
; LE-32BIT-NEXT:    stw 3, 44(1)
; LE-32BIT-NEXT:    srawi 3, 7, 31
; LE-32BIT-NEXT:    stw 7, 32(1)
; LE-32BIT-NEXT:    rlwinm 7, 4, 0, 28, 29
; LE-32BIT-NEXT:    stw 9, 40(1)
; LE-32BIT-NEXT:    rlwinm 4, 4, 3, 27, 28
; LE-32BIT-NEXT:    stw 8, 36(1)
; LE-32BIT-NEXT:    subfic 9, 4, 32
; LE-32BIT-NEXT:    stw 3, 28(1)
; LE-32BIT-NEXT:    stw 3, 24(1)
; LE-32BIT-NEXT:    stw 3, 20(1)
; LE-32BIT-NEXT:    stw 3, 16(1)
; LE-32BIT-NEXT:    sub 3, 6, 7
; LE-32BIT-NEXT:    lwz 6, 4(3)
; LE-32BIT-NEXT:    lwz 7, 0(3)
; LE-32BIT-NEXT:    lwz 8, 12(3)
; LE-32BIT-NEXT:    srw 10, 6, 4
; LE-32BIT-NEXT:    lwz 3, 8(3)
; LE-32BIT-NEXT:    slw 11, 7, 9
; LE-32BIT-NEXT:    slw 6, 6, 9
; LE-32BIT-NEXT:    srw 8, 8, 4
; LE-32BIT-NEXT:    slw 9, 3, 9
; LE-32BIT-NEXT:    srw 3, 3, 4
; LE-32BIT-NEXT:    or 3, 6, 3
; LE-32BIT-NEXT:    stw 3, 8(5)
; LE-32BIT-NEXT:    or 3, 9, 8
; LE-32BIT-NEXT:    sraw 4, 7, 4
; LE-32BIT-NEXT:    stw 3, 12(5)
; LE-32BIT-NEXT:    or 3, 11, 10
; LE-32BIT-NEXT:    stw 4, 0(5)
; LE-32BIT-NEXT:    stw 3, 4(5)
; LE-32BIT-NEXT:    addi 1, 1, 48
; LE-32BIT-NEXT:    blr
  %src = load i128, ptr %src.ptr, align 1
  %byteOff = load i128, ptr %byteOff.ptr, align 1
  %bitOff = shl i128 %byteOff, 3
  %res = ashr i128 %src, %bitOff
  store i128 %res, ptr %dst, align 1
  ret void
}

define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind {
; LE-64BIT-LABEL: ashr_16bytes_wordOff:
; LE-64BIT:       # %bb.0:
; LE-64BIT-NEXT:    lwz 4, 0(4)
; LE-64BIT-NEXT:    ld 6, 8(3)
; LE-64BIT-NEXT:    ld 3, 0(3)
; LE-64BIT-NEXT:    slwi 4, 4, 5
; LE-64BIT-NEXT:    subfic 7, 4, 64
; LE-64BIT-NEXT:    srd 3, 3, 4
; LE-64BIT-NEXT:    sld 7, 6, 7
; LE-64BIT-NEXT:    or 3, 3, 7
; LE-64BIT-NEXT:    addi 7, 4, -64
; LE-64BIT-NEXT:    srad 4, 6, 4
; LE-64BIT-NEXT:    cmpwi 7, 1
; LE-64BIT-NEXT:    srad 8, 6, 7
; LE-64BIT-NEXT:    std 4, 8(5)
; LE-64BIT-NEXT:    isellt 3, 3, 8
; LE-64BIT-NEXT:    std 3, 0(5)
; LE-64BIT-NEXT:    blr
;
; BE-LABEL: ashr_16bytes_wordOff:
; BE:       # %bb.0:
; BE-NEXT:    lwz 4, 12(4)
; BE-NEXT:    ld 6, 0(3)
; BE-NEXT:    slwi 4, 4, 5
; BE-NEXT:    addi 7, 4, -64
; BE-NEXT:    cmpwi 7, 1
; BE-NEXT:    blt 0, .LBB11_2
; BE-NEXT:  # %bb.1:
; BE-NEXT:    srad 3, 6, 7
; BE-NEXT:    b .LBB11_3
; BE-NEXT:  .LBB11_2:
; BE-NEXT:    ld 3, 8(3)
; BE-NEXT:    subfic 7, 4, 64
; BE-NEXT:    sld 7, 6, 7
; BE-NEXT:    srd 3, 3, 4
; BE-NEXT:    or 3, 3, 7
; BE-NEXT:  .LBB11_3:
; BE-NEXT:    srad 4, 6, 4
; BE-NEXT:    std 3, 8(5)
; BE-NEXT:    std 4, 0(5)
; BE-NEXT:    blr
;
; LE-32BIT-LABEL: ashr_16bytes_wordOff:
; LE-32BIT:       # %bb.0:
; LE-32BIT-NEXT:    stwu 1, -48(1)
; LE-32BIT-NEXT:    lwz 7, 0(3)
; LE-32BIT-NEXT:    addi 6, 1, 32
; LE-32BIT-NEXT:    lwz 8, 4(3)
; LE-32BIT-NEXT:    lwz 9, 8(3)
; LE-32BIT-NEXT:    lwz 3, 12(3)
; LE-32BIT-NEXT:    lwz 4, 12(4)
; LE-32BIT-NEXT:    stw 3, 44(1)
; LE-32BIT-NEXT:    srawi 3, 7, 31
; LE-32BIT-NEXT:    rlwinm 4, 4, 2, 28, 29
; LE-32BIT-NEXT:    stw 9, 40(1)
; LE-32BIT-NEXT:    stw 8, 36(1)
; LE-32BIT-NEXT:    stw 7, 32(1)
; LE-32BIT-NEXT:    stw 3, 28(1)
; LE-32BIT-NEXT:    stw 3, 24(1)
; LE-32BIT-NEXT:    stw 3, 20(1)
; LE-32BIT-NEXT:    stw 3, 16(1)
; LE-32BIT-NEXT:    sub 3, 6, 4
; LE-32BIT-NEXT:    lwz 4, 4(3)
; LE-32BIT-NEXT:    lwz 6, 0(3)
; LE-32BIT-NEXT:    lwz 7, 8(3)
; LE-32BIT-NEXT:    lwz 3, 12(3)
; LE-32BIT-NEXT:    stw 7, 8(5)
; LE-32BIT-NEXT:    stw 3, 12(5)
; LE-32BIT-NEXT:    stw 6, 0(5)
; LE-32BIT-NEXT:    stw 4, 4(5)
; LE-32BIT-NEXT:    addi 1, 1, 48
; LE-32BIT-NEXT:    blr
  %src = load i128, ptr %src.ptr, align 1
  %wordOff = load i128, ptr %wordOff.ptr, align 1
  %bitOff = shl i128 %wordOff, 5
  %res = ashr i128 %src, %bitOff
  store i128 %res, ptr %dst, align 1
  ret void
}

define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; LE-64BIT-LABEL: lshr_32bytes:
; LE-64BIT:       # %bb.0:
; LE-64BIT-NEXT:    li 6, 16
; LE-64BIT-NEXT:    lxvd2x 1, 0, 3
; LE-64BIT-NEXT:    xxlxor 2, 2, 2
; LE-64BIT-NEXT:    addi 7, 1, -64
; LE-64BIT-NEXT:    li 8, 32
; LE-64BIT-NEXT:    lxvd2x 0, 3, 6
; LE-64BIT-NEXT:    lwz 3, 0(4)
; LE-64BIT-NEXT:    li 4, 48
; LE-64BIT-NEXT:    stxvd2x 2, 7, 4
; LE-64BIT-NEXT:    stxvd2x 2, 7, 8
; LE-64BIT-NEXT:    rlwinm 4, 3, 0, 27, 28
; LE-64BIT-NEXT:    rlwinm 3, 3, 3, 26, 28
; LE-64BIT-NEXT:    stxvd2x 0, 7, 6
; LE-64BIT-NEXT:    stxvd2x 1, 0, 7
; LE-64BIT-NEXT:    ldux 6, 4, 7
; LE-64BIT-NEXT:    subfic 7, 3, 64
; LE-64BIT-NEXT:    ld 8, 8(4)
; LE-64BIT-NEXT:    ld 9, 16(4)
; LE-64BIT-NEXT:    ld 4, 24(4)
; LE-64BIT-NEXT:    srd 6, 6, 3
; LE-64BIT-NEXT:    sld 10, 8, 7
; LE-64BIT-NEXT:    sld 11, 4, 7
; LE-64BIT-NEXT:    srd 8, 8, 3
; LE-64BIT-NEXT:    sld 7, 9, 7
; LE-64BIT-NEXT:    or 6, 10, 6
; LE-64BIT-NEXT:    srd 10, 9, 3
; LE-64BIT-NEXT:    srd 3, 4, 3
; LE-64BIT-NEXT:    or 7, 7, 8
; LE-64BIT-NEXT:    std 3, 24(5)
; LE-64BIT-NEXT:    or 3, 11, 10
; LE-64BIT-NEXT:    std 7, 8(5)
; LE-64BIT-NEXT:    std 6, 0(5)
; LE-64BIT-NEXT:    std 3, 16(5)
; LE-64BIT-NEXT:    blr
;
; BE-LABEL: lshr_32bytes:
; BE:       # %bb.0:
; BE-NEXT:    ld 6, 0(3)
; BE-NEXT:    ld 7, 8(3)
; BE-NEXT:    ld 8, 16(3)
; BE-NEXT:    ld 3, 24(3)
; BE-NEXT:    lwz 4, 28(4)
; BE-NEXT:    li 9, 0
; BE-NEXT:    addi 10, 1, -32
; BE-NEXT:    std 9, -40(1)
; BE-NEXT:    std 9, -48(1)
; BE-NEXT:    std 9, -56(1)
; BE-NEXT:    std 9, -64(1)
; BE-NEXT:    std 3, -8(1)
; BE-NEXT:    rlwinm 3, 4, 0, 27, 28
; BE-NEXT:    neg 3, 3
; BE-NEXT:    std 8, -16(1)
; BE-NEXT:    std 7, -24(1)
; BE-NEXT:    std 6, -32(1)
; BE-NEXT:    extsw 3, 3
; BE-NEXT:    ldux 3, 10, 3
; BE-NEXT:    rlwinm 4, 4, 3, 26, 28
; BE-NEXT:    subfic 9, 4, 64
; BE-NEXT:    ld 6, 8(10)
; BE-NEXT:    ld 7, 24(10)
; BE-NEXT:    ld 8, 16(10)
; BE-NEXT:    sld 10, 3, 9
; BE-NEXT:    srd 3, 3, 4
; BE-NEXT:    std 3, 0(5)
; BE-NEXT:    srd 11, 6, 4
; BE-NEXT:    srd 7, 7, 4
; BE-NEXT:    sld 6, 6, 9
; BE-NEXT:    sld 9, 8, 9
; BE-NEXT:    srd 8, 8, 4
; BE-NEXT:    or 10, 10, 11
; BE-NEXT:    or 7, 9, 7
; BE-NEXT:    or 6, 6, 8
; BE-NEXT:    std 6, 16(5)
; BE-NEXT:    std 7, 24(5)
; BE-NEXT:    std 10, 8(5)
; BE-NEXT:    blr
;
; LE-32BIT-LABEL: lshr_32bytes:
; LE-32BIT:       # %bb.0:
; LE-32BIT-NEXT:    stwu 1, -112(1)
; LE-32BIT-NEXT:    lwz 7, 0(3)
; LE-32BIT-NEXT:    li 6, 0
; LE-32BIT-NEXT:    lwz 8, 4(3)
; LE-32BIT-NEXT:    lwz 9, 8(3)
; LE-32BIT-NEXT:    lwz 10, 12(3)
; LE-32BIT-NEXT:    lwz 11, 16(3)
; LE-32BIT-NEXT:    lwz 12, 20(3)
; LE-32BIT-NEXT:    lwz 0, 24(3)
; LE-32BIT-NEXT:    lwz 3, 28(3)
; LE-32BIT-NEXT:    lwz 4, 28(4)
; LE-32BIT-NEXT:    stw 6, 44(1)
; LE-32BIT-NEXT:    stw 6, 40(1)
; LE-32BIT-NEXT:    stw 6, 36(1)
; LE-32BIT-NEXT:    stw 6, 32(1)
; LE-32BIT-NEXT:    stw 6, 28(1)
; LE-32BIT-NEXT:    stw 6, 24(1)
; LE-32BIT-NEXT:    stw 6, 20(1)
; LE-32BIT-NEXT:    stw 6, 16(1)
; LE-32BIT-NEXT:    rlwinm 6, 4, 0, 27, 29
; LE-32BIT-NEXT:    stw 3, 76(1)
; LE-32BIT-NEXT:    addi 3, 1, 48
; LE-32BIT-NEXT:    stw 25, 84(1) # 4-byte Folded Spill
; LE-32BIT-NEXT:    sub 3, 3, 6
; LE-32BIT-NEXT:    stw 26, 88(1) # 4-byte Folded Spill
; LE-32BIT-NEXT:    rlwinm 4, 4, 3, 27, 28
; LE-32BIT-NEXT:    stw 27, 92(1) # 4-byte Folded Spill
; LE-32BIT-NEXT:    stw 28, 96(1) # 4-byte Folded Spill
; LE-32BIT-NEXT:    stw 29, 100(1) # 4-byte Folded Spill
; LE-32BIT-NEXT:    stw 30, 104(1) # 4-byte Folded Spill
; LE-32BIT-NEXT:    stw 0, 72(1)
; LE-32BIT-NEXT:    subfic 0, 4, 32
; LE-32BIT-NEXT:    stw 12, 68(1)
; LE-32BIT-NEXT:    stw 11, 64(1)
; LE-32BIT-NEXT:    stw 10, 60(1)
; LE-32BIT-NEXT:    stw 9, 56(1)
; LE-32BIT-NEXT:    stw 8, 52(1)
; LE-32BIT-NEXT:    stw 7, 48(1)
; LE-32BIT-NEXT:    lwz 6, 4(3)
; LE-32BIT-NEXT:    lwz 7, 0(3)
; LE-32BIT-NEXT:    lwz 8, 12(3)
; LE-32BIT-NEXT:    srw 30, 6, 4
; LE-32BIT-NEXT:    lwz 9, 8(3)
; LE-32BIT-NEXT:    slw 29, 7, 0
; LE-32BIT-NEXT:    lwz 10, 20(3)
; LE-32BIT-NEXT:    srw 28, 8, 4
; LE-32BIT-NEXT:    lwz 11, 16(3)
; LE-32BIT-NEXT:    slw 27, 9, 0
; LE-32BIT-NEXT:    lwz 12, 28(3)
; LE-32BIT-NEXT:    slw 6, 6, 0
; LE-32BIT-NEXT:    lwz 3, 24(3)
; LE-32BIT-NEXT:    srw 26, 10, 4
; LE-32BIT-NEXT:    slw 25, 11, 0
; LE-32BIT-NEXT:    slw 8, 8, 0
; LE-32BIT-NEXT:    slw 10, 10, 0
; LE-32BIT-NEXT:    slw 0, 3, 0
; LE-32BIT-NEXT:    srw 3, 3, 4
; LE-32BIT-NEXT:    srw 12, 12, 4
; LE-32BIT-NEXT:    or 3, 10, 3
; LE-32BIT-NEXT:    srw 11, 11, 4
; LE-32BIT-NEXT:    stw 3, 24(5)
; LE-32BIT-NEXT:    or 3, 0, 12
; LE-32BIT-NEXT:    stw 3, 28(5)
; LE-32BIT-NEXT:    or 3, 8, 11
; LE-32BIT-NEXT:    srw 9, 9, 4
; LE-32BIT-NEXT:    stw 3, 16(5)
; LE-32BIT-NEXT:    or 3, 25, 26
; LE-32BIT-NEXT:    stw 3, 20(5)
; LE-32BIT-NEXT:    or 3, 6, 9
; LE-32BIT-NEXT:    stw 3, 8(5)
; LE-32BIT-NEXT:    or 3, 27, 28
; LE-32BIT-NEXT:    srw 4, 7, 4
; LE-32BIT-NEXT:    stw 3, 12(5)
; LE-32BIT-NEXT:    or 3, 29, 30
; LE-32BIT-NEXT:    stw 4, 0(5)
; LE-32BIT-NEXT:    stw 3, 4(5)
; LE-32BIT-NEXT:    lwz 30, 104(1) # 4-byte Folded Reload
; LE-32BIT-NEXT:    lwz 29, 100(1) # 4-byte Folded Reload
; LE-32BIT-NEXT:    lwz 28, 96(1) # 4-byte Folded Reload
; LE-32BIT-NEXT:    lwz 27, 92(1) # 4-byte Folded Reload
; LE-32BIT-NEXT:    lwz 26, 88(1) # 4-byte Folded Reload
; LE-32BIT-NEXT:    lwz 25, 84(1) # 4-byte Folded Reload
; LE-32BIT-NEXT:    addi 1, 1, 112
; LE-32BIT-NEXT:    blr
  %src = load i256, ptr %src.ptr, align 1
  %byteOff = load i256, ptr %byteOff.ptr, align 1
  %bitOff = shl i256 %byteOff, 3
  %res = lshr i256 %src, %bitOff
  store i256 %res, ptr %dst, align 1
  ret void
}

define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind {
; LE-64BIT-LABEL: lshr_32bytes_wordOff:
; LE-64BIT:       # %bb.0:
; LE-64BIT-NEXT:    li 6, 16
; LE-64BIT-NEXT:    lxvd2x 1, 0, 3
; LE-64BIT-NEXT:    xxlxor 2, 2, 2
; LE-64BIT-NEXT:    addi 7, 1, -64
; LE-64BIT-NEXT:    li 8, 32
; LE-64BIT-NEXT:    lxvd2x 0, 3, 6
; LE-64BIT-NEXT:    lwz 3, 0(4)
; LE-64BIT-NEXT:    li 4, 48
; LE-64BIT-NEXT:    stxvd2x 2, 7, 4
; LE-64BIT-NEXT:    stxvd2x 2, 7, 8
; LE-64BIT-NEXT:    rlwinm 4, 3, 2, 27, 28
; LE-64BIT-NEXT:    rlwinm 3, 3, 5, 26, 26
; LE-64BIT-NEXT:    stxvd2x 0, 7, 6
; LE-64BIT-NEXT:    stxvd2x 1, 0, 7
; LE-64BIT-NEXT:    ldux 6, 4, 7
; LE-64BIT-NEXT:    subfic 7, 3, 64
; LE-64BIT-NEXT:    ld 8, 8(4)
; LE-64BIT-NEXT:    ld 9, 16(4)
; LE-64BIT-NEXT:    ld 4, 24(4)
; LE-64BIT-NEXT:    srd 6, 6, 3
; LE-64BIT-NEXT:    sld 10, 8, 7
; LE-64BIT-NEXT:    sld 11, 4, 7
; LE-64BIT-NEXT:    srd 8, 8, 3
; LE-64BIT-NEXT:    sld 7, 9, 7
; LE-64BIT-NEXT:    or 6, 10, 6
; LE-64BIT-NEXT:    srd 10, 9, 3
; LE-64BIT-NEXT:    srd 3, 4, 3
; LE-64BIT-NEXT:    or 7, 7, 8
; LE-64BIT-NEXT:    std 3, 24(5)
; LE-64BIT-NEXT:    or 3, 11, 10
; LE-64BIT-NEXT:    std 7, 8(5)
; LE-64BIT-NEXT:    std 6, 0(5)
; LE-64BIT-NEXT:    std 3, 16(5)
; LE-64BIT-NEXT:    blr
;
; BE-LABEL: lshr_32bytes_wordOff:
; BE:       # %bb.0:
; BE-NEXT:    ld 6, 0(3)
; BE-NEXT:    ld 7, 8(3)
; BE-NEXT:    ld 8, 16(3)
; BE-NEXT:    ld 3, 24(3)
; BE-NEXT:    lwz 4, 28(4)
; BE-NEXT:    li 9, 0
; BE-NEXT:    addi 10, 1, -32
; BE-NEXT:    std 9, -40(1)
; BE-NEXT:    std 9, -48(1)
; BE-NEXT:    std 9, -56(1)
; BE-NEXT:    std 9, -64(1)
; BE-NEXT:    std 3, -8(1)
; BE-NEXT:    rlwinm 3, 4, 2, 27, 28
; BE-NEXT:    neg 3, 3
; BE-NEXT:    std 8, -16(1)
; BE-NEXT:    std 7, -24(1)
; BE-NEXT:    std 6, -32(1)
; BE-NEXT:    extsw 3, 3
; BE-NEXT:    ldux 3, 10, 3
; BE-NEXT:    rlwinm 4, 4, 5, 26, 26
; BE-NEXT:    subfic 9, 4, 64
; BE-NEXT:    ld 6, 8(10)
; BE-NEXT:    ld 7, 24(10)
; BE-NEXT:    ld 8, 16(10)
; BE-NEXT:    sld 10, 3, 9
; BE-NEXT:    srd 3, 3, 4
; BE-NEXT:    std 3, 0(5)
; BE-NEXT:    srd 11, 6, 4
; BE-NEXT:    srd 7, 7, 4
; BE-NEXT:    sld 6, 6, 9
; BE-NEXT:    sld 9, 8, 9
; BE-NEXT:    srd 8, 8, 4
; BE-NEXT:    or 10, 10, 11
; BE-NEXT:    or 7, 9, 7
; BE-NEXT:    or 6, 6, 8
; BE-NEXT:    std 6, 16(5)
; BE-NEXT:    std 7, 24(5)
; BE-NEXT:    std 10, 8(5)
; BE-NEXT:    blr
;
; LE-32BIT-LABEL: lshr_32bytes_wordOff:
; LE-32BIT:       # %bb.0:
; LE-32BIT-NEXT:    stwu 1, -80(1)
; LE-32BIT-NEXT:    lwz 7, 0(3)
; LE-32BIT-NEXT:    li 6, 0
; LE-32BIT-NEXT:    lwz 8, 4(3)
; LE-32BIT-NEXT:    lwz 9, 8(3)
; LE-32BIT-NEXT:    lwz 10, 12(3)
; LE-32BIT-NEXT:    lwz 11, 16(3)
; LE-32BIT-NEXT:    lwz 12, 20(3)
; LE-32BIT-NEXT:    lwz 0, 24(3)
; LE-32BIT-NEXT:    lwz 3, 28(3)
; LE-32BIT-NEXT:    lwz 4, 28(4)
; LE-32BIT-NEXT:    stw 3, 76(1)
; LE-32BIT-NEXT:    addi 3, 1, 48
; LE-32BIT-NEXT:    rlwinm 4, 4, 2, 27, 29
; LE-32BIT-NEXT:    stw 6, 44(1)
; LE-32BIT-NEXT:    sub 3, 3, 4
; LE-32BIT-NEXT:    stw 6, 40(1)
; LE-32BIT-NEXT:    stw 6, 36(1)
; LE-32BIT-NEXT:    stw 6, 32(1)
; LE-32BIT-NEXT:    stw 6, 28(1)
; LE-32BIT-NEXT:    stw 6, 24(1)
; LE-32BIT-NEXT:    stw 6, 20(1)
; LE-32BIT-NEXT:    stw 6, 16(1)
; LE-32BIT-NEXT:    stw 0, 72(1)
; LE-32BIT-NEXT:    stw 12, 68(1)
; LE-32BIT-NEXT:    stw 11, 64(1)
; LE-32BIT-NEXT:    stw 10, 60(1)
; LE-32BIT-NEXT:    stw 9, 56(1)
; LE-32BIT-NEXT:    stw 8, 52(1)
; LE-32BIT-NEXT:    stw 7, 48(1)
; LE-32BIT-NEXT:    lwz 4, 4(3)
; LE-32BIT-NEXT:    lwz 6, 0(3)
; LE-32BIT-NEXT:    lwz 7, 12(3)
; LE-32BIT-NEXT:    lwz 8, 8(3)
; LE-32BIT-NEXT:    lwz 9, 20(3)
; LE-32BIT-NEXT:    lwz 10, 16(3)
; LE-32BIT-NEXT:    lwz 11, 24(3)
; LE-32BIT-NEXT:    lwz 3, 28(3)
; LE-32BIT-NEXT:    stw 11, 24(5)
; LE-32BIT-NEXT:    stw 3, 28(5)
; LE-32BIT-NEXT:    stw 10, 16(5)
; LE-32BIT-NEXT:    stw 9, 20(5)
; LE-32BIT-NEXT:    stw 8, 8(5)
; LE-32BIT-NEXT:    stw 7, 12(5)
; LE-32BIT-NEXT:    stw 6, 0(5)
; LE-32BIT-NEXT:    stw 4, 4(5)
; LE-32BIT-NEXT:    addi 1, 1, 80
; LE-32BIT-NEXT:    blr
  %src = load i256, ptr %src.ptr, align 1
  %wordOff = load i256, ptr %wordOff.ptr, align 1
  %bitOff = shl i256 %wordOff, 5
  %res = lshr i256 %src, %bitOff
  store i256 %res, ptr %dst, align 1
  ret void
}

define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nounwind {
; LE-64BIT-LABEL: lshr_32bytes_dwordOff:
; LE-64BIT:       # %bb.0:
; LE-64BIT-NEXT:    li 6, 16
; LE-64BIT-NEXT:    lxvd2x 1, 0, 3
; LE-64BIT-NEXT:    xxlxor 2, 2, 2
; LE-64BIT-NEXT:    addi 7, 1, -64
; LE-64BIT-NEXT:    lxvd2x 0, 3, 6
; LE-64BIT-NEXT:    lwz 3, 0(4)
; LE-64BIT-NEXT:    li 4, 48
; LE-64BIT-NEXT:    stxvd2x 2, 7, 4
; LE-64BIT-NEXT:    li 4, 32
; LE-64BIT-NEXT:    rlwinm 3, 3, 3, 27, 28
; LE-64BIT-NEXT:    stxvd2x 2, 7, 4
; LE-64BIT-NEXT:    stxvd2x 0, 7, 6
; LE-64BIT-NEXT:    stxvd2x 1, 0, 7
; LE-64BIT-NEXT:    lxvd2x 0, 7, 3
; LE-64BIT-NEXT:    add 3, 7, 3
; LE-64BIT-NEXT:    lxvd2x 1, 3, 6
; LE-64BIT-NEXT:    stxvd2x 1, 5, 6
; LE-64BIT-NEXT:    stxvd2x 0, 0, 5
; LE-64BIT-NEXT:    blr
;
; BE-LABEL: lshr_32bytes_dwordOff:
; BE:       # %bb.0:
; BE-NEXT:    ld 7, 0(3)
; BE-NEXT:    ld 8, 8(3)
; BE-NEXT:    ld 9, 16(3)
; BE-NEXT:    ld 3, 24(3)
; BE-NEXT:    lwz 4, 28(4)
; BE-NEXT:    li 6, 0
; BE-NEXT:    std 6, -40(1)
; BE-NEXT:    std 6, -48(1)
; BE-NEXT:    std 6, -56(1)
; BE-NEXT:    std 6, -64(1)
; BE-NEXT:    std 3, -8(1)
; BE-NEXT:    rlwinm 3, 4, 3, 27, 28
; BE-NEXT:    neg 3, 3
; BE-NEXT:    std 9, -16(1)
; BE-NEXT:    std 8, -24(1)
; BE-NEXT:    std 7, -32(1)
; BE-NEXT:    extsw 3, 3
; BE-NEXT:    addi 4, 1, -32
; BE-NEXT:    ldux 3, 4, 3
; BE-NEXT:    ld 6, 8(4)
; BE-NEXT:    ld 7, 24(4)
; BE-NEXT:    ld 4, 16(4)
; BE-NEXT:    std 3, 0(5)
; BE-NEXT:    std 4, 16(5)
; BE-NEXT:    std 7, 24(5)
; BE-NEXT:    std 6, 8(5)
; BE-NEXT:    blr
;
; LE-32BIT-LABEL: lshr_32bytes_dwordOff:
; LE-32BIT:       # %bb.0:
; LE-32BIT-NEXT:    stwu 1, -80(1)
; LE-32BIT-NEXT:    lwz 7, 0(3)
; LE-32BIT-NEXT:    li 6, 0
; LE-32BIT-NEXT:    lwz 8, 4(3)
; LE-32BIT-NEXT:    lwz 9, 8(3)
; LE-32BIT-NEXT:    lwz 10, 12(3)
; LE-32BIT-NEXT:    lwz 11, 16(3)
; LE-32BIT-NEXT:    lwz 12, 20(3)
; LE-32BIT-NEXT:    lwz 0, 24(3)
; LE-32BIT-NEXT:    lwz 3, 28(3)
; LE-32BIT-NEXT:    lwz 4, 28(4)
; LE-32BIT-NEXT:    stw 3, 76(1)
; LE-32BIT-NEXT:    addi 3, 1, 48
; LE-32BIT-NEXT:    rlwinm 4, 4, 3, 27, 28
; LE-32BIT-NEXT:    stw 6, 44(1)
; LE-32BIT-NEXT:    sub 3, 3, 4
; LE-32BIT-NEXT:    stw 6, 40(1)
; LE-32BIT-NEXT:    stw 6, 36(1)
; LE-32BIT-NEXT:    stw 6, 32(1)
; LE-32BIT-NEXT:    stw 6, 28(1)
; LE-32BIT-NEXT:    stw 6, 24(1)
; LE-32BIT-NEXT:    stw 6, 20(1)
; LE-32BIT-NEXT:    stw 6, 16(1)
; LE-32BIT-NEXT:    stw 0, 72(1)
; LE-32BIT-NEXT:    stw 12, 68(1)
; LE-32BIT-NEXT:    stw 11, 64(1)
; LE-32BIT-NEXT:    stw 10, 60(1)
; LE-32BIT-NEXT:    stw 9, 56(1)
; LE-32BIT-NEXT:    stw 8, 52(1)
; LE-32BIT-NEXT:    stw 7, 48(1)
; LE-32BIT-NEXT:    lwz 4, 4(3)
; LE-32BIT-NEXT:    lwz 6, 0(3)
; LE-32BIT-NEXT:    lwz 7, 12(3)
; LE-32BIT-NEXT:    lwz 8, 8(3)
; LE-32BIT-NEXT:    lwz 9, 20(3)
; LE-32BIT-NEXT:    lwz 10, 16(3)
; LE-32BIT-NEXT:    lwz 11, 24(3)
; LE-32BIT-NEXT:    lwz 3, 28(3)
; LE-32BIT-NEXT:    stw 11, 24(5)
; LE-32BIT-NEXT:    stw 3, 28(5)
; LE-32BIT-NEXT:    stw 10, 16(5)
; LE-32BIT-NEXT:    stw 9, 20(5)
; LE-32BIT-NEXT:    stw 8, 8(5)
; LE-32BIT-NEXT:    stw 7, 12(5)
; LE-32BIT-NEXT:    stw 6, 0(5)
; LE-32BIT-NEXT:    stw 4, 4(5)
; LE-32BIT-NEXT:    addi 1, 1, 80
; LE-32BIT-NEXT:    blr
  %src = load i256, ptr %src.ptr, align 1
  %dwordOff = load i256, ptr %dwordOff.ptr, align 1
  %bitOff = shl i256 %dwordOff, 6
  %res = lshr i256 %src, %bitOff
  store i256 %res, ptr %dst, align 1
  ret void
}

define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; LE-64BIT-LABEL: shl_32bytes:
; LE-64BIT:       # %bb.0:
; LE-64BIT-NEXT:    li 6, 16
; LE-64BIT-NEXT:    lwz 4, 0(4)
; LE-64BIT-NEXT:    xxlxor 2, 2, 2
; LE-64BIT-NEXT:    addi 7, 1, -64
; LE-64BIT-NEXT:    lxvd2x 1, 0, 3
; LE-64BIT-NEXT:    addi 8, 1, -32
; LE-64BIT-NEXT:    lxvd2x 0, 3, 6
; LE-64BIT-NEXT:    stxvd2x 2, 7, 6
; LE-64BIT-NEXT:    li 6, 48
; LE-64BIT-NEXT:    rlwinm 3, 4, 0, 27, 28
; LE-64BIT-NEXT:    rlwinm 4, 4, 3, 26, 28
; LE-64BIT-NEXT:    neg 3, 3
; LE-64BIT-NEXT:    stxvd2x 0, 7, 6
; LE-64BIT-NEXT:    li 6, 32
; LE-64BIT-NEXT:    extsw 3, 3
; LE-64BIT-NEXT:    stxvd2x 1, 7, 6
; LE-64BIT-NEXT:    stxvd2x 2, 0, 7
; LE-64BIT-NEXT:    subfic 6, 4, 64
; LE-64BIT-NEXT:    ldux 3, 8, 3
; LE-64BIT-NEXT:    ld 7, 16(8)
; LE-64BIT-NEXT:    ld 9, 24(8)
; LE-64BIT-NEXT:    ld 8, 8(8)
; LE-64BIT-NEXT:    srd 10, 7, 6
; LE-64BIT-NEXT:    sld 9, 9, 4
; LE-64BIT-NEXT:    sld 7, 7, 4
; LE-64BIT-NEXT:    or 9, 9, 10
; LE-64BIT-NEXT:    srd 10, 8, 6
; LE-64BIT-NEXT:    srd 6, 3, 6
; LE-64BIT-NEXT:    sld 8, 8, 4
; LE-64BIT-NEXT:    sld 3, 3, 4
; LE-64BIT-NEXT:    or 6, 8, 6
; LE-64BIT-NEXT:    std 3, 0(5)
; LE-64BIT-NEXT:    or 3, 7, 10
; LE-64BIT-NEXT:    std 9, 24(5)
; LE-64BIT-NEXT:    std 6, 8(5)
; LE-64BIT-NEXT:    std 3, 16(5)
; LE-64BIT-NEXT:    blr
;
; BE-LABEL: shl_32bytes:
; BE:       # %bb.0:
; BE-NEXT:    ld 6, 0(3)
; BE-NEXT:    ld 7, 8(3)
; BE-NEXT:    ld 8, 16(3)
; BE-NEXT:    ld 3, 24(3)
; BE-NEXT:    lwz 4, 28(4)
; BE-NEXT:    li 9, 0
; BE-NEXT:    addi 10, 1, -64
; BE-NEXT:    std 9, -8(1)
; BE-NEXT:    std 9, -16(1)
; BE-NEXT:    std 9, -24(1)
; BE-NEXT:    std 9, -32(1)
; BE-NEXT:    std 3, -40(1)
; BE-NEXT:    std 8, -48(1)
; BE-NEXT:    std 7, -56(1)
; BE-NEXT:    std 6, -64(1)
; BE-NEXT:    rlwinm 3, 4, 0, 27, 28
; BE-NEXT:    ldux 6, 3, 10
; BE-NEXT:    rlwinm 4, 4, 3, 26, 28
; BE-NEXT:    subfic 9, 4, 64
; BE-NEXT:    ld 7, 16(3)
; BE-NEXT:    ld 8, 8(3)
; BE-NEXT:    ld 3, 24(3)
; BE-NEXT:    sld 6, 6, 4
; BE-NEXT:    srd 10, 7, 9
; BE-NEXT:    sld 11, 8, 4
; BE-NEXT:    srd 8, 8, 9
; BE-NEXT:    srd 9, 3, 9
; BE-NEXT:    sld 7, 7, 4
; BE-NEXT:    sld 3, 3, 4
; BE-NEXT:    or 10, 11, 10
; BE-NEXT:    or 6, 6, 8
; BE-NEXT:    or 7, 7, 9
; BE-NEXT:    std 3, 24(5)
; BE-NEXT:    std 7, 16(5)
; BE-NEXT:    std 6, 0(5)
; BE-NEXT:    std 10, 8(5)
; BE-NEXT:    blr
;
; LE-32BIT-LABEL: shl_32bytes:
; LE-32BIT:       # %bb.0:
; LE-32BIT-NEXT:    stwu 1, -112(1)
; LE-32BIT-NEXT:    lwz 7, 0(3)
; LE-32BIT-NEXT:    li 6, 0
; LE-32BIT-NEXT:    lwz 8, 4(3)
; LE-32BIT-NEXT:    lwz 9, 8(3)
; LE-32BIT-NEXT:    lwz 10, 12(3)
; LE-32BIT-NEXT:    lwz 11, 16(3)
; LE-32BIT-NEXT:    lwz 12, 20(3)
; LE-32BIT-NEXT:    lwz 0, 24(3)
; LE-32BIT-NEXT:    lwz 3, 28(3)
; LE-32BIT-NEXT:    lwz 4, 28(4)
; LE-32BIT-NEXT:    stw 25, 84(1) # 4-byte Folded Spill
; LE-32BIT-NEXT:    stw 26, 88(1) # 4-byte Folded Spill
; LE-32BIT-NEXT:    stw 27, 92(1) # 4-byte Folded Spill
; LE-32BIT-NEXT:    stw 28, 96(1) # 4-byte Folded Spill
; LE-32BIT-NEXT:    stw 29, 100(1) # 4-byte Folded Spill
; LE-32BIT-NEXT:    stw 30, 104(1) # 4-byte Folded Spill
; LE-32BIT-NEXT:    stw 6, 76(1)
; LE-32BIT-NEXT:    stw 6, 72(1)
; LE-32BIT-NEXT:    stw 6, 68(1)
; LE-32BIT-NEXT:    stw 6, 64(1)
; LE-32BIT-NEXT:    stw 6, 60(1)
; LE-32BIT-NEXT:    stw 6, 56(1)
; LE-32BIT-NEXT:    stw 6, 52(1)
; LE-32BIT-NEXT:    stw 6, 48(1)
; LE-32BIT-NEXT:    rlwinm 6, 4, 0, 27, 29
; LE-32BIT-NEXT:    stw 3, 44(1)
; LE-32BIT-NEXT:    addi 3, 1, 16
; LE-32BIT-NEXT:    stw 0, 40(1)
; LE-32BIT-NEXT:    rlwinm 4, 4, 3, 27, 28
; LE-32BIT-NEXT:    stw 12, 36(1)
; LE-32BIT-NEXT:    subfic 12, 4, 32
; LE-32BIT-NEXT:    stw 11, 32(1)
; LE-32BIT-NEXT:    stw 10, 28(1)
; LE-32BIT-NEXT:    stw 9, 24(1)
; LE-32BIT-NEXT:    stw 8, 20(1)
; LE-32BIT-NEXT:    stw 7, 16(1)
; LE-32BIT-NEXT:    lwzux 3, 6, 3
; LE-32BIT-NEXT:    lwz 7, 8(6)
; LE-32BIT-NEXT:    slw 3, 3, 4
; LE-32BIT-NEXT:    lwz 8, 4(6)
; LE-32BIT-NEXT:    lwz 9, 16(6)
; LE-32BIT-NEXT:    srw 30, 7, 12
; LE-32BIT-NEXT:    lwz 10, 12(6)
; LE-32BIT-NEXT:    slw 29, 8, 4
; LE-32BIT-NEXT:    lwz 11, 24(6)
; LE-32BIT-NEXT:    srw 8, 8, 12
; LE-32BIT-NEXT:    lwz 0, 20(6)
; LE-32BIT-NEXT:    srw 28, 9, 12
; LE-32BIT-NEXT:    lwz 6, 28(6)
; LE-32BIT-NEXT:    slw 27, 10, 4
; LE-32BIT-NEXT:    srw 10, 10, 12
; LE-32BIT-NEXT:    slw 7, 7, 4
; LE-32BIT-NEXT:    srw 26, 11, 12
; LE-32BIT-NEXT:    slw 25, 0, 4
; LE-32BIT-NEXT:    srw 0, 0, 12
; LE-32BIT-NEXT:    slw 9, 9, 4
; LE-32BIT-NEXT:    srw 12, 6, 12
; LE-32BIT-NEXT:    slw 11, 11, 4
; LE-32BIT-NEXT:    slw 4, 6, 4
; LE-32BIT-NEXT:    stw 4, 28(5)
; LE-32BIT-NEXT:    or 4, 11, 12
; LE-32BIT-NEXT:    stw 4, 24(5)
; LE-32BIT-NEXT:    or 4, 9, 0
; LE-32BIT-NEXT:    stw 4, 16(5)
; LE-32BIT-NEXT:    or 4, 25, 26
; LE-32BIT-NEXT:    stw 4, 20(5)
; LE-32BIT-NEXT:    or 4, 7, 10
; LE-32BIT-NEXT:    or 3, 3, 8
; LE-32BIT-NEXT:    stw 4, 8(5)
; LE-32BIT-NEXT:    or 4, 27, 28
; LE-32BIT-NEXT:    stw 3, 0(5)
; LE-32BIT-NEXT:    or 3, 29, 30
; LE-32BIT-NEXT:    stw 4, 12(5)
; LE-32BIT-NEXT:    stw 3, 4(5)
; LE-32BIT-NEXT:    lwz 30, 104(1) # 4-byte Folded Reload
; LE-32BIT-NEXT:    lwz 29, 100(1) # 4-byte Folded Reload
; LE-32BIT-NEXT:    lwz 28, 96(1) # 4-byte Folded Reload
; LE-32BIT-NEXT:    lwz 27, 92(1) # 4-byte Folded Reload
; LE-32BIT-NEXT:    lwz 26, 88(1) # 4-byte Folded Reload
; LE-32BIT-NEXT:    lwz 25, 84(1) # 4-byte Folded Reload
; LE-32BIT-NEXT:    addi 1, 1, 112
; LE-32BIT-NEXT:    blr
  %src = load i256, ptr %src.ptr, align 1
  %byteOff = load i256, ptr %byteOff.ptr, align 1
  %bitOff = shl i256 %byteOff, 3
  %res = shl i256 %src, %bitOff
  store i256 %res, ptr %dst, align 1
  ret void
}

define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind {
; LE-64BIT-LABEL: shl_32bytes_wordOff:
; LE-64BIT:       # %bb.0:
; LE-64BIT-NEXT:    li 6, 16
; LE-64BIT-NEXT:    lwz 4, 0(4)
; LE-64BIT-NEXT:    xxlxor 2, 2, 2
; LE-64BIT-NEXT:    addi 7, 1, -64
; LE-64BIT-NEXT:    lxvd2x 1, 0, 3
; LE-64BIT-NEXT:    addi 8, 1, -32
; LE-64BIT-NEXT:    lxvd2x 0, 3, 6
; LE-64BIT-NEXT:    stxvd2x 2, 7, 6
; LE-64BIT-NEXT:    li 6, 48
; LE-64BIT-NEXT:    rlwinm 3, 4, 2, 27, 28
; LE-64BIT-NEXT:    rlwinm 4, 4, 5, 26, 26
; LE-64BIT-NEXT:    neg 3, 3
; LE-64BIT-NEXT:    stxvd2x 0, 7, 6
; LE-64BIT-NEXT:    li 6, 32
; LE-64BIT-NEXT:    extsw 3, 3
; LE-64BIT-NEXT:    stxvd2x 1, 7, 6
; LE-64BIT-NEXT:    stxvd2x 2, 0, 7
; LE-64BIT-NEXT:    subfic 6, 4, 64
; LE-64BIT-NEXT:    ldux 3, 8, 3
; LE-64BIT-NEXT:    ld 7, 16(8)
; LE-64BIT-NEXT:    ld 9, 24(8)
; LE-64BIT-NEXT:    ld 8, 8(8)
; LE-64BIT-NEXT:    srd 10, 7, 6
; LE-64BIT-NEXT:    sld 9, 9, 4
; LE-64BIT-NEXT:    sld 7, 7, 4
; LE-64BIT-NEXT:    or 9, 9, 10
; LE-64BIT-NEXT:    srd 10, 8, 6
; LE-64BIT-NEXT:    srd 6, 3, 6
; LE-64BIT-NEXT:    sld 8, 8, 4
; LE-64BIT-NEXT:    sld 3, 3, 4
; LE-64BIT-NEXT:    or 6, 8, 6
; LE-64BIT-NEXT:    std 3, 0(5)
; LE-64BIT-NEXT:    or 3, 7, 10
; LE-64BIT-NEXT:    std 9, 24(5)
; LE-64BIT-NEXT:    std 6, 8(5)
; LE-64BIT-NEXT:    std 3, 16(5)
; LE-64BIT-NEXT:    blr
;
; BE-LABEL: shl_32bytes_wordOff:
; BE:       # %bb.0:
; BE-NEXT:    ld 6, 0(3)
; BE-NEXT:    ld 7, 8(3)
; BE-NEXT:    ld 8, 16(3)
; BE-NEXT:    ld 3, 24(3)
; BE-NEXT:    lwz 4, 28(4)
; BE-NEXT:    li 9, 0
; BE-NEXT:    addi 10, 1, -64
; BE-NEXT:    std 9, -8(1)
; BE-NEXT:    std 9, -16(1)
; BE-NEXT:    std 9, -24(1)
; BE-NEXT:    std 9, -32(1)
; BE-NEXT:    std 3, -40(1)
; BE-NEXT:    std 8, -48(1)
; BE-NEXT:    std 7, -56(1)
; BE-NEXT:    std 6, -64(1)
; BE-NEXT:    rlwinm 3, 4, 2, 27, 28
; BE-NEXT:    ldux 6, 3, 10
; BE-NEXT:    rlwinm 4, 4, 5, 26, 26
; BE-NEXT:    subfic 9, 4, 64
; BE-NEXT:    ld 7, 16(3)
; BE-NEXT:    ld 8, 8(3)
; BE-NEXT:    ld 3, 24(3)
; BE-NEXT:    sld 6, 6, 4
; BE-NEXT:    srd 10, 7, 9
; BE-NEXT:    sld 11, 8, 4
; BE-NEXT:    srd 8, 8, 9
; BE-NEXT:    srd 9, 3, 9
; BE-NEXT:    sld 7, 7, 4
; BE-NEXT:    sld 3, 3, 4
; BE-NEXT:    or 10, 11, 10
; BE-NEXT:    or 6, 6, 8
; BE-NEXT:    or 7, 7, 9
; BE-NEXT:    std 3, 24(5)
; BE-NEXT:    std 7, 16(5)
; BE-NEXT:    std 6, 0(5)
; BE-NEXT:    std 10, 8(5)
; BE-NEXT:    blr
;
; LE-32BIT-LABEL: shl_32bytes_wordOff:
; LE-32BIT:       # %bb.0:
; LE-32BIT-NEXT:    stwu 1, -80(1)
; LE-32BIT-NEXT:    lwz 7, 0(3)
; LE-32BIT-NEXT:    li 6, 0
; LE-32BIT-NEXT:    lwz 8, 4(3)
; LE-32BIT-NEXT:    lwz 9, 8(3)
; LE-32BIT-NEXT:    lwz 10, 12(3)
; LE-32BIT-NEXT:    lwz 11, 16(3)
; LE-32BIT-NEXT:    lwz 12, 20(3)
; LE-32BIT-NEXT:    lwz 0, 24(3)
; LE-32BIT-NEXT:    lwz 3, 28(3)
; LE-32BIT-NEXT:    lwz 4, 28(4)
; LE-32BIT-NEXT:    stw 6, 76(1)
; LE-32BIT-NEXT:    stw 6, 72(1)
; LE-32BIT-NEXT:    rlwinm 4, 4, 2, 27, 29
; LE-32BIT-NEXT:    stw 6, 68(1)
; LE-32BIT-NEXT:    stw 6, 64(1)
; LE-32BIT-NEXT:    stw 6, 60(1)
; LE-32BIT-NEXT:    stw 6, 56(1)
; LE-32BIT-NEXT:    stw 6, 52(1)
; LE-32BIT-NEXT:    stw 6, 48(1)
; LE-32BIT-NEXT:    stw 3, 44(1)
; LE-32BIT-NEXT:    addi 3, 1, 16
; LE-32BIT-NEXT:    stw 0, 40(1)
; LE-32BIT-NEXT:    stw 12, 36(1)
; LE-32BIT-NEXT:    stw 11, 32(1)
; LE-32BIT-NEXT:    stw 10, 28(1)
; LE-32BIT-NEXT:    stw 9, 24(1)
; LE-32BIT-NEXT:    stw 8, 20(1)
; LE-32BIT-NEXT:    stw 7, 16(1)
; LE-32BIT-NEXT:    lwzux 3, 4, 3
; LE-32BIT-NEXT:    lwz 6, 4(4)
; LE-32BIT-NEXT:    lwz 7, 12(4)
; LE-32BIT-NEXT:    lwz 8, 8(4)
; LE-32BIT-NEXT:    lwz 9, 20(4)
; LE-32BIT-NEXT:    lwz 10, 16(4)
; LE-32BIT-NEXT:    lwz 11, 28(4)
; LE-32BIT-NEXT:    lwz 4, 24(4)
; LE-32BIT-NEXT:    stw 3, 0(5)
; LE-32BIT-NEXT:    stw 4, 24(5)
; LE-32BIT-NEXT:    stw 11, 28(5)
; LE-32BIT-NEXT:    stw 10, 16(5)
; LE-32BIT-NEXT:    stw 9, 20(5)
; LE-32BIT-NEXT:    stw 8, 8(5)
; LE-32BIT-NEXT:    stw 7, 12(5)
; LE-32BIT-NEXT:    stw 6, 4(5)
; LE-32BIT-NEXT:    addi 1, 1, 80
; LE-32BIT-NEXT:    blr
  %src = load i256, ptr %src.ptr, align 1
  %wordOff = load i256, ptr %wordOff.ptr, align 1
  %bitOff = shl i256 %wordOff, 5
  %res = shl i256 %src, %bitOff
  store i256 %res, ptr %dst, align 1
  ret void
}

define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nounwind {
; LE-64BIT-LABEL: shl_32bytes_dwordOff:
; LE-64BIT:       # %bb.0:
; LE-64BIT-NEXT:    li 6, 16
; LE-64BIT-NEXT:    lxvd2x 1, 0, 3
; LE-64BIT-NEXT:    xxlxor 2, 2, 2
; LE-64BIT-NEXT:    li 7, 48
; LE-64BIT-NEXT:    lxvd2x 0, 3, 6
; LE-64BIT-NEXT:    lwz 3, 0(4)
; LE-64BIT-NEXT:    addi 4, 1, -64
; LE-64BIT-NEXT:    stxvd2x 2, 4, 6
; LE-64BIT-NEXT:    rlwinm 3, 3, 3, 27, 28
; LE-64BIT-NEXT:    stxvd2x 0, 4, 7
; LE-64BIT-NEXT:    li 7, 32
; LE-64BIT-NEXT:    neg 3, 3
; LE-64BIT-NEXT:    stxvd2x 1, 4, 7
; LE-64BIT-NEXT:    stxvd2x 2, 0, 4
; LE-64BIT-NEXT:    extsw 3, 3
; LE-64BIT-NEXT:    addi 4, 1, -32
; LE-64BIT-NEXT:    lxvd2x 0, 4, 3
; LE-64BIT-NEXT:    add 3, 4, 3
; LE-64BIT-NEXT:    lxvd2x 1, 3, 6
; LE-64BIT-NEXT:    stxvd2x 1, 5, 6
; LE-64BIT-NEXT:    stxvd2x 0, 0, 5
; LE-64BIT-NEXT:    blr
;
; BE-LABEL: shl_32bytes_dwordOff:
; BE:       # %bb.0:
; BE-NEXT:    ld 7, 0(3)
; BE-NEXT:    ld 8, 8(3)
; BE-NEXT:    ld 9, 16(3)
; BE-NEXT:    ld 3, 24(3)
; BE-NEXT:    lwz 4, 28(4)
; BE-NEXT:    li 6, 0
; BE-NEXT:    std 6, -8(1)
; BE-NEXT:    std 6, -16(1)
; BE-NEXT:    std 6, -24(1)
; BE-NEXT:    std 6, -32(1)
; BE-NEXT:    std 3, -40(1)
; BE-NEXT:    std 9, -48(1)
; BE-NEXT:    std 8, -56(1)
; BE-NEXT:    std 7, -64(1)
; BE-NEXT:    rlwinm 3, 4, 3, 27, 28
; BE-NEXT:    addi 4, 1, -64
; BE-NEXT:    ldux 4, 3, 4
; BE-NEXT:    ld 6, 8(3)
; BE-NEXT:    ld 7, 24(3)
; BE-NEXT:    ld 3, 16(3)
; BE-NEXT:    std 4, 0(5)
; BE-NEXT:    std 3, 16(5)
; BE-NEXT:    std 7, 24(5)
; BE-NEXT:    std 6, 8(5)
; BE-NEXT:    blr
;
; LE-32BIT-LABEL: shl_32bytes_dwordOff:
; LE-32BIT:       # %bb.0:
; LE-32BIT-NEXT:    stwu 1, -80(1)
; LE-32BIT-NEXT:    lwz 7, 0(3)
; LE-32BIT-NEXT:    li 6, 0
; LE-32BIT-NEXT:    lwz 8, 4(3)
; LE-32BIT-NEXT:    lwz 9, 8(3)
; LE-32BIT-NEXT:    lwz 10, 12(3)
; LE-32BIT-NEXT:    lwz 11, 16(3)
; LE-32BIT-NEXT:    lwz 12, 20(3)
; LE-32BIT-NEXT:    lwz 0, 24(3)
; LE-32BIT-NEXT:    lwz 3, 28(3)
; LE-32BIT-NEXT:    lwz 4, 28(4)
; LE-32BIT-NEXT:    stw 6, 76(1)
; LE-32BIT-NEXT:    stw 6, 72(1)
; LE-32BIT-NEXT:    rlwinm 4, 4, 3, 27, 28
; LE-32BIT-NEXT:    stw 6, 68(1)
; LE-32BIT-NEXT:    stw 6, 64(1)
; LE-32BIT-NEXT:    stw 6, 60(1)
; LE-32BIT-NEXT:    stw 6, 56(1)
; LE-32BIT-NEXT:    stw 6, 52(1)
; LE-32BIT-NEXT:    stw 6, 48(1)
; LE-32BIT-NEXT:    stw 3, 44(1)
; LE-32BIT-NEXT:    addi 3, 1, 16
; LE-32BIT-NEXT:    stw 0, 40(1)
; LE-32BIT-NEXT:    stw 12, 36(1)
; LE-32BIT-NEXT:    stw 11, 32(1)
; LE-32BIT-NEXT:    stw 10, 28(1)
; LE-32BIT-NEXT:    stw 9, 24(1)
; LE-32BIT-NEXT:    stw 8, 20(1)
; LE-32BIT-NEXT:    stw 7, 16(1)
; LE-32BIT-NEXT:    lwzux 3, 4, 3
; LE-32BIT-NEXT:    lwz 6, 12(4)
; LE-32BIT-NEXT:    lwz 7, 8(4)
; LE-32BIT-NEXT:    lwz 8, 20(4)
; LE-32BIT-NEXT:    lwz 9, 16(4)
; LE-32BIT-NEXT:    lwz 10, 28(4)
; LE-32BIT-NEXT:    lwz 11, 24(4)
; LE-32BIT-NEXT:    ori 4, 4, 4
; LE-32BIT-NEXT:    lwz 4, 0(4)
; LE-32BIT-NEXT:    stw 3, 0(5)
; LE-32BIT-NEXT:    stw 11, 24(5)
; LE-32BIT-NEXT:    stw 10, 28(5)
; LE-32BIT-NEXT:    stw 9, 16(5)
; LE-32BIT-NEXT:    stw 8, 20(5)
; LE-32BIT-NEXT:    stw 7, 8(5)
; LE-32BIT-NEXT:    stw 6, 12(5)
; LE-32BIT-NEXT:    stw 4, 4(5)
; LE-32BIT-NEXT:    addi 1, 1, 80
; LE-32BIT-NEXT:    blr
  %src = load i256, ptr %src.ptr, align 1
  %dwordOff = load i256, ptr %dwordOff.ptr, align 1
  %bitOff = shl i256 %dwordOff, 6
  %res = shl i256 %src, %bitOff
  store i256 %res, ptr %dst, align 1
  ret void
}


define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; LE-64BIT-LABEL: ashr_32bytes:
; LE-64BIT:       # %bb.0:
; LE-64BIT-NEXT:    ld 6, 24(3)
; LE-64BIT-NEXT:    lxvd2x 0, 0, 3
; LE-64BIT-NEXT:    lwz 4, 0(4)
; LE-64BIT-NEXT:    addi 7, 1, -64
; LE-64BIT-NEXT:    ld 3, 16(3)
; LE-64BIT-NEXT:    sradi 8, 6, 63
; LE-64BIT-NEXT:    rlwinm 9, 4, 0, 27, 28
; LE-64BIT-NEXT:    stxvd2x 0, 0, 7
; LE-64BIT-NEXT:    std 6, -40(1)
; LE-64BIT-NEXT:    std 3, -48(1)
; LE-64BIT-NEXT:    std 8, -8(1)
; LE-64BIT-NEXT:    std 8, -16(1)
; LE-64BIT-NEXT:    std 8, -24(1)
; LE-64BIT-NEXT:    std 8, -32(1)
; LE-64BIT-NEXT:    rlwinm 3, 4, 3, 26, 28
; LE-64BIT-NEXT:    ldux 4, 9, 7
; LE-64BIT-NEXT:    ld 7, 8(9)
; LE-64BIT-NEXT:    subfic 6, 3, 64
; LE-64BIT-NEXT:    ld 8, 16(9)
; LE-64BIT-NEXT:    ld 9, 24(9)
; LE-64BIT-NEXT:    srd 4, 4, 3
; LE-64BIT-NEXT:    sld 10, 7, 6
; LE-64BIT-NEXT:    sld 11, 9, 6
; LE-64BIT-NEXT:    srd 7, 7, 3
; LE-64BIT-NEXT:    sld 6, 8, 6
; LE-64BIT-NEXT:    or 4, 10, 4
; LE-64BIT-NEXT:    srd 10, 8, 3
; LE-64BIT-NEXT:    srad 3, 9, 3
; LE-64BIT-NEXT:    or 6, 6, 7
; LE-64BIT-NEXT:    std 3, 24(5)
; LE-64BIT-NEXT:    or 3, 11, 10
; LE-64BIT-NEXT:    std 6, 8(5)
; LE-64BIT-NEXT:    std 4, 0(5)
; LE-64BIT-NEXT:    std 3, 16(5)
; LE-64BIT-NEXT:    blr
;
; BE-LABEL: ashr_32bytes:
; BE:       # %bb.0:
; BE-NEXT:    ld 7, 0(3)
; BE-NEXT:    ld 8, 8(3)
; BE-NEXT:    ld 9, 16(3)
; BE-NEXT:    ld 3, 24(3)
; BE-NEXT:    lwz 4, 28(4)
; BE-NEXT:    addi 6, 1, -32
; BE-NEXT:    std 3, -8(1)
; BE-NEXT:    std 7, -32(1)
; BE-NEXT:    sradi 3, 7, 63
; BE-NEXT:    rlwinm 7, 4, 0, 27, 28
; BE-NEXT:    std 3, -40(1)
; BE-NEXT:    std 3, -48(1)
; BE-NEXT:    std 3, -56(1)
; BE-NEXT:    std 3, -64(1)
; BE-NEXT:    neg 3, 7
; BE-NEXT:    std 9, -16(1)
; BE-NEXT:    std 8, -24(1)
; BE-NEXT:    extsw 3, 3
; BE-NEXT:    ldux 3, 6, 3
; BE-NEXT:    rlwinm 4, 4, 3, 26, 28
; BE-NEXT:    subfic 9, 4, 64
; BE-NEXT:    ld 7, 8(6)
; BE-NEXT:    ld 8, 24(6)
; BE-NEXT:    ld 6, 16(6)
; BE-NEXT:    sld 10, 3, 9
; BE-NEXT:    srad 3, 3, 4
; BE-NEXT:    std 3, 0(5)
; BE-NEXT:    srd 11, 7, 4
; BE-NEXT:    srd 8, 8, 4
; BE-NEXT:    sld 7, 7, 9
; BE-NEXT:    sld 9, 6, 9
; BE-NEXT:    srd 6, 6, 4
; BE-NEXT:    or 10, 10, 11
; BE-NEXT:    or 8, 9, 8
; BE-NEXT:    or 6, 7, 6
; BE-NEXT:    std 6, 16(5)
; BE-NEXT:    std 8, 24(5)
; BE-NEXT:    std 10, 8(5)
; BE-NEXT:    blr
;
; LE-32BIT-LABEL: ashr_32bytes:
; LE-32BIT:       # %bb.0:
; LE-32BIT-NEXT:    stwu 1, -112(1)
; LE-32BIT-NEXT:    lwz 7, 0(3)
; LE-32BIT-NEXT:    addi 6, 1, 48
; LE-32BIT-NEXT:    lwz 8, 4(3)
; LE-32BIT-NEXT:    lwz 9, 8(3)
; LE-32BIT-NEXT:    lwz 10, 12(3)
; LE-32BIT-NEXT:    lwz 11, 16(3)
; LE-32BIT-NEXT:    lwz 12, 20(3)
; LE-32BIT-NEXT:    lwz 0, 24(3)
; LE-32BIT-NEXT:    lwz 3, 28(3)
; LE-32BIT-NEXT:    lwz 4, 28(4)
; LE-32BIT-NEXT:    stw 3, 76(1)
; LE-32BIT-NEXT:    srawi 3, 7, 31
; LE-32BIT-NEXT:    stw 7, 48(1)
; LE-32BIT-NEXT:    rlwinm 7, 4, 0, 27, 29
; LE-32BIT-NEXT:    stw 25, 84(1) # 4-byte Folded Spill
; LE-32BIT-NEXT:    rlwinm 4, 4, 3, 27, 28
; LE-32BIT-NEXT:    stw 26, 88(1) # 4-byte Folded Spill
; LE-32BIT-NEXT:    stw 27, 92(1) # 4-byte Folded Spill
; LE-32BIT-NEXT:    stw 28, 96(1) # 4-byte Folded Spill
; LE-32BIT-NEXT:    stw 29, 100(1) # 4-byte Folded Spill
; LE-32BIT-NEXT:    stw 30, 104(1) # 4-byte Folded Spill
; LE-32BIT-NEXT:    stw 0, 72(1)
; LE-32BIT-NEXT:    subfic 0, 4, 32
; LE-32BIT-NEXT:    stw 12, 68(1)
; LE-32BIT-NEXT:    stw 11, 64(1)
; LE-32BIT-NEXT:    stw 10, 60(1)
; LE-32BIT-NEXT:    stw 9, 56(1)
; LE-32BIT-NEXT:    stw 8, 52(1)
; LE-32BIT-NEXT:    stw 3, 44(1)
; LE-32BIT-NEXT:    stw 3, 40(1)
; LE-32BIT-NEXT:    stw 3, 36(1)
; LE-32BIT-NEXT:    stw 3, 32(1)
; LE-32BIT-NEXT:    stw 3, 28(1)
; LE-32BIT-NEXT:    stw 3, 24(1)
; LE-32BIT-NEXT:    stw 3, 20(1)
; LE-32BIT-NEXT:    stw 3, 16(1)
; LE-32BIT-NEXT:    sub 3, 6, 7
; LE-32BIT-NEXT:    lwz 6, 4(3)
; LE-32BIT-NEXT:    lwz 7, 0(3)
; LE-32BIT-NEXT:    lwz 8, 12(3)
; LE-32BIT-NEXT:    srw 30, 6, 4
; LE-32BIT-NEXT:    lwz 9, 8(3)
; LE-32BIT-NEXT:    slw 29, 7, 0
; LE-32BIT-NEXT:    lwz 10, 20(3)
; LE-32BIT-NEXT:    srw 28, 8, 4
; LE-32BIT-NEXT:    lwz 11, 16(3)
; LE-32BIT-NEXT:    slw 27, 9, 0
; LE-32BIT-NEXT:    lwz 12, 28(3)
; LE-32BIT-NEXT:    slw 6, 6, 0
; LE-32BIT-NEXT:    lwz 3, 24(3)
; LE-32BIT-NEXT:    srw 26, 10, 4
; LE-32BIT-NEXT:    slw 25, 11, 0
; LE-32BIT-NEXT:    slw 8, 8, 0
; LE-32BIT-NEXT:    slw 10, 10, 0
; LE-32BIT-NEXT:    slw 0, 3, 0
; LE-32BIT-NEXT:    srw 3, 3, 4
; LE-32BIT-NEXT:    srw 12, 12, 4
; LE-32BIT-NEXT:    or 3, 10, 3
; LE-32BIT-NEXT:    srw 11, 11, 4
; LE-32BIT-NEXT:    stw 3, 24(5)
; LE-32BIT-NEXT:    or 3, 0, 12
; LE-32BIT-NEXT:    stw 3, 28(5)
; LE-32BIT-NEXT:    or 3, 8, 11
; LE-32BIT-NEXT:    srw 9, 9, 4
; LE-32BIT-NEXT:    stw 3, 16(5)
; LE-32BIT-NEXT:    or 3, 25, 26
; LE-32BIT-NEXT:    stw 3, 20(5)
; LE-32BIT-NEXT:    or 3, 6, 9
; LE-32BIT-NEXT:    stw 3, 8(5)
; LE-32BIT-NEXT:    or 3, 27, 28
; LE-32BIT-NEXT:    sraw 4, 7, 4
; LE-32BIT-NEXT:    stw 3, 12(5)
; LE-32BIT-NEXT:    or 3, 29, 30
; LE-32BIT-NEXT:    stw 4, 0(5)
; LE-32BIT-NEXT:    stw 3, 4(5)
; LE-32BIT-NEXT:    lwz 30, 104(1) # 4-byte Folded Reload
; LE-32BIT-NEXT:    lwz 29, 100(1) # 4-byte Folded Reload
; LE-32BIT-NEXT:    lwz 28, 96(1) # 4-byte Folded Reload
; LE-32BIT-NEXT:    lwz 27, 92(1) # 4-byte Folded Reload
; LE-32BIT-NEXT:    lwz 26, 88(1) # 4-byte Folded Reload
; LE-32BIT-NEXT:    lwz 25, 84(1) # 4-byte Folded Reload
; LE-32BIT-NEXT:    addi 1, 1, 112
; LE-32BIT-NEXT:    blr
  %src = load i256, ptr %src.ptr, align 1
  %byteOff = load i256, ptr %byteOff.ptr, align 1
  %bitOff = shl i256 %byteOff, 3
  %res = ashr i256 %src, %bitOff
  store i256 %res, ptr %dst, align 1
  ret void
}

define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind {
; LE-64BIT-LABEL: ashr_32bytes_wordOff:
; LE-64BIT:       # %bb.0:
; LE-64BIT-NEXT:    ld 6, 24(3)
; LE-64BIT-NEXT:    lxvd2x 0, 0, 3
; LE-64BIT-NEXT:    lwz 4, 0(4)
; LE-64BIT-NEXT:    addi 7, 1, -64
; LE-64BIT-NEXT:    ld 3, 16(3)
; LE-64BIT-NEXT:    sradi 8, 6, 63
; LE-64BIT-NEXT:    rlwinm 9, 4, 2, 27, 28
; LE-64BIT-NEXT:    stxvd2x 0, 0, 7
; LE-64BIT-NEXT:    std 6, -40(1)
; LE-64BIT-NEXT:    std 3, -48(1)
; LE-64BIT-NEXT:    std 8, -8(1)
; LE-64BIT-NEXT:    std 8, -16(1)
; LE-64BIT-NEXT:    std 8, -24(1)
; LE-64BIT-NEXT:    std 8, -32(1)
; LE-64BIT-NEXT:    rlwinm 3, 4, 5, 26, 26
; LE-64BIT-NEXT:    ldux 4, 9, 7
; LE-64BIT-NEXT:    ld 7, 8(9)
; LE-64BIT-NEXT:    subfic 6, 3, 64
; LE-64BIT-NEXT:    ld 8, 16(9)
; LE-64BIT-NEXT:    ld 9, 24(9)
; LE-64BIT-NEXT:    srd 4, 4, 3
; LE-64BIT-NEXT:    sld 10, 7, 6
; LE-64BIT-NEXT:    sld 11, 9, 6
; LE-64BIT-NEXT:    srd 7, 7, 3
; LE-64BIT-NEXT:    sld 6, 8, 6
; LE-64BIT-NEXT:    or 4, 10, 4
; LE-64BIT-NEXT:    srd 10, 8, 3
; LE-64BIT-NEXT:    srad 3, 9, 3
; LE-64BIT-NEXT:    or 6, 6, 7
; LE-64BIT-NEXT:    std 3, 24(5)
; LE-64BIT-NEXT:    or 3, 11, 10
; LE-64BIT-NEXT:    std 6, 8(5)
; LE-64BIT-NEXT:    std 4, 0(5)
; LE-64BIT-NEXT:    std 3, 16(5)
; LE-64BIT-NEXT:    blr
;
; BE-LABEL: ashr_32bytes_wordOff:
; BE:       # %bb.0:
; BE-NEXT:    ld 7, 0(3)
; BE-NEXT:    ld 8, 8(3)
; BE-NEXT:    ld 9, 16(3)
; BE-NEXT:    ld 3, 24(3)
; BE-NEXT:    lwz 4, 28(4)
; BE-NEXT:    addi 6, 1, -32
; BE-NEXT:    std 3, -8(1)
; BE-NEXT:    std 7, -32(1)
; BE-NEXT:    sradi 3, 7, 63
; BE-NEXT:    rlwinm 7, 4, 2, 27, 28
; BE-NEXT:    std 3, -40(1)
; BE-NEXT:    std 3, -48(1)
; BE-NEXT:    std 3, -56(1)
; BE-NEXT:    std 3, -64(1)
; BE-NEXT:    neg 3, 7
; BE-NEXT:    std 9, -16(1)
; BE-NEXT:    std 8, -24(1)
; BE-NEXT:    extsw 3, 3
; BE-NEXT:    ldux 3, 6, 3
; BE-NEXT:    rlwinm 4, 4, 5, 26, 26
; BE-NEXT:    subfic 9, 4, 64
; BE-NEXT:    ld 7, 8(6)
; BE-NEXT:    ld 8, 24(6)
; BE-NEXT:    ld 6, 16(6)
; BE-NEXT:    sld 10, 3, 9
; BE-NEXT:    srad 3, 3, 4
; BE-NEXT:    std 3, 0(5)
; BE-NEXT:    srd 11, 7, 4
; BE-NEXT:    srd 8, 8, 4
; BE-NEXT:    sld 7, 7, 9
; BE-NEXT:    sld 9, 6, 9
; BE-NEXT:    srd 6, 6, 4
; BE-NEXT:    or 10, 10, 11
; BE-NEXT:    or 8, 9, 8
; BE-NEXT:    or 6, 7, 6
; BE-NEXT:    std 6, 16(5)
; BE-NEXT:    std 8, 24(5)
; BE-NEXT:    std 10, 8(5)
; BE-NEXT:    blr
;
; LE-32BIT-LABEL: ashr_32bytes_wordOff:
; LE-32BIT:       # %bb.0:
; LE-32BIT-NEXT:    stwu 1, -80(1)
; LE-32BIT-NEXT:    lwz 7, 0(3)
; LE-32BIT-NEXT:    addi 6, 1, 48
; LE-32BIT-NEXT:    lwz 8, 4(3)
; LE-32BIT-NEXT:    lwz 9, 8(3)
; LE-32BIT-NEXT:    lwz 10, 12(3)
; LE-32BIT-NEXT:    lwz 11, 16(3)
; LE-32BIT-NEXT:    lwz 12, 20(3)
; LE-32BIT-NEXT:    lwz 0, 24(3)
; LE-32BIT-NEXT:    lwz 3, 28(3)
; LE-32BIT-NEXT:    lwz 4, 28(4)
; LE-32BIT-NEXT:    stw 3, 76(1)
; LE-32BIT-NEXT:    srawi 3, 7, 31
; LE-32BIT-NEXT:    rlwinm 4, 4, 2, 27, 29
; LE-32BIT-NEXT:    stw 0, 72(1)
; LE-32BIT-NEXT:    stw 12, 68(1)
; LE-32BIT-NEXT:    stw 11, 64(1)
; LE-32BIT-NEXT:    stw 10, 60(1)
; LE-32BIT-NEXT:    stw 9, 56(1)
; LE-32BIT-NEXT:    stw 8, 52(1)
; LE-32BIT-NEXT:    stw 7, 48(1)
; LE-32BIT-NEXT:    stw 3, 44(1)
; LE-32BIT-NEXT:    stw 3, 40(1)
; LE-32BIT-NEXT:    stw 3, 36(1)
; LE-32BIT-NEXT:    stw 3, 32(1)
; LE-32BIT-NEXT:    stw 3, 28(1)
; LE-32BIT-NEXT:    stw 3, 24(1)
; LE-32BIT-NEXT:    stw 3, 20(1)
; LE-32BIT-NEXT:    stw 3, 16(1)
; LE-32BIT-NEXT:    sub 3, 6, 4
; LE-32BIT-NEXT:    lwz 4, 4(3)
; LE-32BIT-NEXT:    lwz 6, 0(3)
; LE-32BIT-NEXT:    lwz 7, 12(3)
; LE-32BIT-NEXT:    lwz 8, 8(3)
; LE-32BIT-NEXT:    lwz 9, 20(3)
; LE-32BIT-NEXT:    lwz 10, 16(3)
; LE-32BIT-NEXT:    lwz 11, 24(3)
; LE-32BIT-NEXT:    lwz 3, 28(3)
; LE-32BIT-NEXT:    stw 11, 24(5)
; LE-32BIT-NEXT:    stw 3, 28(5)
; LE-32BIT-NEXT:    stw 10, 16(5)
; LE-32BIT-NEXT:    stw 9, 20(5)
; LE-32BIT-NEXT:    stw 8, 8(5)
; LE-32BIT-NEXT:    stw 7, 12(5)
; LE-32BIT-NEXT:    stw 6, 0(5)
; LE-32BIT-NEXT:    stw 4, 4(5)
; LE-32BIT-NEXT:    addi 1, 1, 80
; LE-32BIT-NEXT:    blr
  %src = load i256, ptr %src.ptr, align 1
  %wordOff = load i256, ptr %wordOff.ptr, align 1
  %bitOff = shl i256 %wordOff, 5
  %res = ashr i256 %src, %bitOff
  store i256 %res, ptr %dst, align 1
  ret void
}

define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nounwind {
; LE-64BIT-LABEL: ashr_32bytes_dwordOff:
; LE-64BIT:       # %bb.0:
; LE-64BIT-NEXT:    lxvd2x 0, 0, 3
; LE-64BIT-NEXT:    ld 6, 16(3)
; LE-64BIT-NEXT:    ld 7, 24(3)
; LE-64BIT-NEXT:    lwz 3, 0(4)
; LE-64BIT-NEXT:    addi 4, 1, -64
; LE-64BIT-NEXT:    rlwinm 3, 3, 3, 27, 28
; LE-64BIT-NEXT:    stxvd2x 0, 0, 4
; LE-64BIT-NEXT:    std 6, -48(1)
; LE-64BIT-NEXT:    sradi 6, 7, 63
; LE-64BIT-NEXT:    std 7, -40(1)
; LE-64BIT-NEXT:    std 6, -8(1)
; LE-64BIT-NEXT:    std 6, -16(1)
; LE-64BIT-NEXT:    std 6, -24(1)
; LE-64BIT-NEXT:    std 6, -32(1)
; LE-64BIT-NEXT:    lxvd2x 0, 4, 3
; LE-64BIT-NEXT:    add 3, 4, 3
; LE-64BIT-NEXT:    li 4, 16
; LE-64BIT-NEXT:    lxvd2x 1, 3, 4
; LE-64BIT-NEXT:    stxvd2x 1, 5, 4
; LE-64BIT-NEXT:    stxvd2x 0, 0, 5
; LE-64BIT-NEXT:    blr
;
; BE-LABEL: ashr_32bytes_dwordOff:
; BE:       # %bb.0:
; BE-NEXT:    ld 7, 0(3)
; BE-NEXT:    ld 8, 8(3)
; BE-NEXT:    ld 9, 16(3)
; BE-NEXT:    ld 3, 24(3)
; BE-NEXT:    lwz 4, 28(4)
; BE-NEXT:    addi 6, 1, -32
; BE-NEXT:    std 3, -8(1)
; BE-NEXT:    sradi 3, 7, 63
; BE-NEXT:    rlwinm 4, 4, 3, 27, 28
; BE-NEXT:    std 3, -40(1)
; BE-NEXT:    std 3, -48(1)
; BE-NEXT:    std 3, -56(1)
; BE-NEXT:    std 3, -64(1)
; BE-NEXT:    neg 3, 4
; BE-NEXT:    std 9, -16(1)
; BE-NEXT:    std 8, -24(1)
; BE-NEXT:    std 7, -32(1)
; BE-NEXT:    extsw 3, 3
; BE-NEXT:    ldux 3, 6, 3
; BE-NEXT:    ld 4, 8(6)
; BE-NEXT:    ld 7, 24(6)
; BE-NEXT:    ld 6, 16(6)
; BE-NEXT:    std 3, 0(5)
; BE-NEXT:    std 6, 16(5)
; BE-NEXT:    std 7, 24(5)
; BE-NEXT:    std 4, 8(5)
; BE-NEXT:    blr
;
; LE-32BIT-LABEL: ashr_32bytes_dwordOff:
; LE-32BIT:       # %bb.0:
; LE-32BIT-NEXT:    stwu 1, -80(1)
; LE-32BIT-NEXT:    lwz 7, 0(3)
; LE-32BIT-NEXT:    addi 6, 1, 48
; LE-32BIT-NEXT:    lwz 8, 4(3)
; LE-32BIT-NEXT:    lwz 9, 8(3)
; LE-32BIT-NEXT:    lwz 10, 12(3)
; LE-32BIT-NEXT:    lwz 11, 16(3)
; LE-32BIT-NEXT:    lwz 12, 20(3)
; LE-32BIT-NEXT:    lwz 0, 24(3)
; LE-32BIT-NEXT:    lwz 3, 28(3)
; LE-32BIT-NEXT:    lwz 4, 28(4)
; LE-32BIT-NEXT:    stw 3, 76(1)
; LE-32BIT-NEXT:    srawi 3, 7, 31
; LE-32BIT-NEXT:    rlwinm 4, 4, 3, 27, 28
; LE-32BIT-NEXT:    stw 0, 72(1)
; LE-32BIT-NEXT:    stw 12, 68(1)
; LE-32BIT-NEXT:    stw 11, 64(1)
; LE-32BIT-NEXT:    stw 10, 60(1)
; LE-32BIT-NEXT:    stw 9, 56(1)
; LE-32BIT-NEXT:    stw 8, 52(1)
; LE-32BIT-NEXT:    stw 7, 48(1)
; LE-32BIT-NEXT:    stw 3, 44(1)
; LE-32BIT-NEXT:    stw 3, 40(1)
; LE-32BIT-NEXT:    stw 3, 36(1)
; LE-32BIT-NEXT:    stw 3, 32(1)
; LE-32BIT-NEXT:    stw 3, 28(1)
; LE-32BIT-NEXT:    stw 3, 24(1)
; LE-32BIT-NEXT:    stw 3, 20(1)
; LE-32BIT-NEXT:    stw 3, 16(1)
; LE-32BIT-NEXT:    sub 3, 6, 4
; LE-32BIT-NEXT:    lwz 4, 4(3)
; LE-32BIT-NEXT:    lwz 6, 0(3)
; LE-32BIT-NEXT:    lwz 7, 12(3)
; LE-32BIT-NEXT:    lwz 8, 8(3)
; LE-32BIT-NEXT:    lwz 9, 20(3)
; LE-32BIT-NEXT:    lwz 10, 16(3)
; LE-32BIT-NEXT:    lwz 11, 24(3)
; LE-32BIT-NEXT:    lwz 3, 28(3)
; LE-32BIT-NEXT:    stw 11, 24(5)
; LE-32BIT-NEXT:    stw 3, 28(5)
; LE-32BIT-NEXT:    stw 10, 16(5)
; LE-32BIT-NEXT:    stw 9, 20(5)
; LE-32BIT-NEXT:    stw 8, 8(5)
; LE-32BIT-NEXT:    stw 7, 12(5)
; LE-32BIT-NEXT:    stw 6, 0(5)
; LE-32BIT-NEXT:    stw 4, 4(5)
; LE-32BIT-NEXT:    addi 1, 1, 80
; LE-32BIT-NEXT:    blr
  %src = load i256, ptr %src.ptr, align 1
  %dwordOff = load i256, ptr %dwordOff.ptr, align 1
  %bitOff = shl i256 %dwordOff, 6
  %res = ashr i256 %src, %bitOff
  store i256 %res, ptr %dst, align 1
  ret void
}


;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; LE: {{.*}}