llvm/llvm/test/CodeGen/AArch64/sve-reassocadd.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64 -mattr=+sve %s -o - | FileCheck %s

define <vscale x 16 x i8> @i8_1v_4s(ptr %b) {
; CHECK-LABEL: i8_1v_4s:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    rdvl x8, #1
; CHECK-NEXT:    ptrue p0.b
; CHECK-NEXT:    mov w9, #4 // =0x4
; CHECK-NEXT:    add x8, x0, x8
; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x8, x9]
; CHECK-NEXT:    ret
entry:
  %0 = tail call i64 @llvm.vscale.i64()
  %1 = shl nuw nsw i64 %0, 4
  %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1
  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 4
  %2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16
  ret <vscale x 16 x i8> %2
}

define <vscale x 16 x i8> @i8_4s_1v(ptr %b) {
; CHECK-LABEL: i8_4s_1v:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ptrue p0.b
; CHECK-NEXT:    add x8, x0, #4
; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x8, #1, mul vl]
; CHECK-NEXT:    ret
entry:
  %add.ptr = getelementptr inbounds i8, ptr %b, i64 4
  %0 = tail call i64 @llvm.vscale.i64()
  %1 = shl nuw nsw i64 %0, 4
  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1
  %2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16
  ret <vscale x 16 x i8> %2
}

define <vscale x 8 x i16> @i16_1v_8s(ptr %b) {
; CHECK-LABEL: i16_1v_8s:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    rdvl x8, #1
; CHECK-NEXT:    ptrue p0.h
; CHECK-NEXT:    mov x9, #4 // =0x4
; CHECK-NEXT:    add x8, x0, x8
; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
; CHECK-NEXT:    ret
entry:
  %0 = tail call i64 @llvm.vscale.i64()
  %1 = shl nuw nsw i64 %0, 3
  %add.ptr = getelementptr inbounds i16, ptr %b, i64 %1
  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 8
  %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
  ret <vscale x 8 x i16> %2
}

define <vscale x 8 x i16> @i16_8s_1v(ptr %b) {
; CHECK-LABEL: i16_8s_1v:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ptrue p0.h
; CHECK-NEXT:    add x8, x0, #8
; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x8, #1, mul vl]
; CHECK-NEXT:    ret
entry:
  %add.ptr = getelementptr inbounds i8, ptr %b, i64 8
  %0 = tail call i64 @llvm.vscale.i64()
  %1 = shl nuw nsw i64 %0, 3
  %add.ptr1 = getelementptr inbounds i16, ptr %add.ptr, i64 %1
  %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
  ret <vscale x 8 x i16> %2
}

define <vscale x 8 x i16> @i16_2v_8s(ptr %b) {
; CHECK-LABEL: i16_2v_8s:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    rdvl x8, #2
; CHECK-NEXT:    ptrue p0.h
; CHECK-NEXT:    mov x9, #4 // =0x4
; CHECK-NEXT:    add x8, x0, x8
; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
; CHECK-NEXT:    ret
entry:
  %0 = tail call i64 @llvm.vscale.i64()
  %1 = shl nuw nsw i64 %0, 4
  %add.ptr = getelementptr inbounds i16, ptr %b, i64 %1
  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 8
  %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
  ret <vscale x 8 x i16> %2
}

define <vscale x 8 x i16> @i16_8s_2v(ptr %b) {
; CHECK-LABEL: i16_8s_2v:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ptrue p0.h
; CHECK-NEXT:    add x8, x0, #8
; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x8, #2, mul vl]
; CHECK-NEXT:    ret
entry:
  %add.ptr = getelementptr inbounds i8, ptr %b, i64 8
  %0 = tail call i64 @llvm.vscale.i64()
  %1 = shl nuw nsw i64 %0, 4
  %add.ptr1 = getelementptr inbounds i16, ptr %add.ptr, i64 %1
  %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
  ret <vscale x 8 x i16> %2
}

define <vscale x 4 x i32> @i32_1v_16s(ptr %b) {
; CHECK-LABEL: i32_1v_16s:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    rdvl x8, #1
; CHECK-NEXT:    ptrue p0.s
; CHECK-NEXT:    mov x9, #4 // =0x4
; CHECK-NEXT:    add x8, x0, x8
; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
; CHECK-NEXT:    ret
entry:
  %0 = tail call i64 @llvm.vscale.i64()
  %1 = shl nuw nsw i64 %0, 2
  %add.ptr = getelementptr inbounds i32, ptr %b, i64 %1
  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 16
  %2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16
  ret <vscale x 4 x i32> %2
}

define <vscale x 4 x i32> @i32_16s_2v(ptr %b) {
; CHECK-LABEL: i32_16s_2v:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ptrue p0.s
; CHECK-NEXT:    add x8, x0, #16
; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x8, #1, mul vl]
; CHECK-NEXT:    ret
entry:
  %add.ptr = getelementptr inbounds i8, ptr %b, i64 16
  %0 = tail call i64 @llvm.vscale.i64()
  %1 = shl nuw nsw i64 %0, 2
  %add.ptr1 = getelementptr inbounds i32, ptr %add.ptr, i64 %1
  %2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16
  ret <vscale x 4 x i32> %2
}

define <vscale x 2 x i64> @i64_1v_32s(ptr %b) {
; CHECK-LABEL: i64_1v_32s:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    rdvl x8, #1
; CHECK-NEXT:    ptrue p0.d
; CHECK-NEXT:    mov x9, #4 // =0x4
; CHECK-NEXT:    add x8, x0, x8
; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
; CHECK-NEXT:    ret
entry:
  %0 = tail call i64 @llvm.vscale.i64()
  %1 = shl nuw nsw i64 %0, 1
  %add.ptr = getelementptr inbounds i64, ptr %b, i64 %1
  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 32
  %2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16
  ret <vscale x 2 x i64> %2
}

define <vscale x 2 x i64> @i64_32s_2v(ptr %b) {
; CHECK-LABEL: i64_32s_2v:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ptrue p0.d
; CHECK-NEXT:    add x8, x0, #32
; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x8, #1, mul vl]
; CHECK-NEXT:    ret
entry:
  %add.ptr = getelementptr inbounds i8, ptr %b, i64 32
  %0 = tail call i64 @llvm.vscale.i64()
  %1 = shl nuw nsw i64 %0, 1
  %add.ptr1 = getelementptr inbounds i64, ptr %add.ptr, i64 %1
  %2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16
  ret <vscale x 2 x i64> %2
}


define <vscale x 16 x i8> @i8_m2v_4s(ptr %b) {
; CHECK-LABEL: i8_m2v_4s:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    cnth x8, all, mul #4
; CHECK-NEXT:    ptrue p0.b
; CHECK-NEXT:    mov w9, #4 // =0x4
; CHECK-NEXT:    sub x8, x0, x8
; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x8, x9]
; CHECK-NEXT:    ret
entry:
  %0 = tail call i64 @llvm.vscale.i64()
  %1 = mul i64 %0, -32
  %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1
  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 4
  %2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16
  ret <vscale x 16 x i8> %2
}

define <vscale x 16 x i8> @i8_4s_m2v(ptr %b) {
; CHECK-LABEL: i8_4s_m2v:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ptrue p0.b
; CHECK-NEXT:    add x8, x0, #4
; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x8, #-2, mul vl]
; CHECK-NEXT:    ret
entry:
  %add.ptr = getelementptr inbounds i8, ptr %b, i64 4
  %0 = tail call i64 @llvm.vscale.i64()
  %1 = mul i64 %0, -32
  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1
  %2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16
  ret <vscale x 16 x i8> %2
}

define <vscale x 8 x i16> @i16_m2v_8s(ptr %b) {
; CHECK-LABEL: i16_m2v_8s:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    cnth x8, all, mul #4
; CHECK-NEXT:    ptrue p0.h
; CHECK-NEXT:    mov x9, #4 // =0x4
; CHECK-NEXT:    sub x8, x0, x8
; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
; CHECK-NEXT:    ret
entry:
  %0 = tail call i64 @llvm.vscale.i64()
  %1 = mul i64 %0, -32
  %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1
  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 8
  %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
  ret <vscale x 8 x i16> %2
}

define <vscale x 8 x i16> @i16_8s_m2v(ptr %b) {
; CHECK-LABEL: i16_8s_m2v:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ptrue p0.h
; CHECK-NEXT:    add x8, x0, #8
; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x8, #-2, mul vl]
; CHECK-NEXT:    ret
entry:
  %add.ptr = getelementptr inbounds i8, ptr %b, i64 8
  %0 = tail call i64 @llvm.vscale.i64()
  %1 = mul i64 %0, -32
  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1
  %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
  ret <vscale x 8 x i16> %2
}

define <vscale x 4 x i32> @i32_m2v_16s(ptr %b) {
; CHECK-LABEL: i32_m2v_16s:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    cnth x8, all, mul #4
; CHECK-NEXT:    ptrue p0.s
; CHECK-NEXT:    mov x9, #4 // =0x4
; CHECK-NEXT:    sub x8, x0, x8
; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
; CHECK-NEXT:    ret
entry:
  %0 = tail call i64 @llvm.vscale.i64()
  %1 = mul i64 %0, -32
  %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1
  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 16
  %2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16
  ret <vscale x 4 x i32> %2
}

define <vscale x 4 x i32> @i32_16s_m2v(ptr %b) {
; CHECK-LABEL: i32_16s_m2v:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ptrue p0.s
; CHECK-NEXT:    add x8, x0, #16
; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x8, #-2, mul vl]
; CHECK-NEXT:    ret
entry:
  %add.ptr = getelementptr inbounds i8, ptr %b, i64 16
  %0 = tail call i64 @llvm.vscale.i64()
  %1 = mul i64 %0, -32
  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1
  %2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16
  ret <vscale x 4 x i32> %2
}

define <vscale x 2 x i64> @i64_m2v_32s(ptr %b) {
; CHECK-LABEL: i64_m2v_32s:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    cnth x8, all, mul #4
; CHECK-NEXT:    ptrue p0.d
; CHECK-NEXT:    mov x9, #4 // =0x4
; CHECK-NEXT:    sub x8, x0, x8
; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
; CHECK-NEXT:    ret
entry:
  %0 = tail call i64 @llvm.vscale.i64()
  %1 = mul i64 %0, -32
  %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1
  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 32
  %2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16
  ret <vscale x 2 x i64> %2
}

define <vscale x 2 x i64> @i64_32s_m2v(ptr %b) {
; CHECK-LABEL: i64_32s_m2v:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ptrue p0.d
; CHECK-NEXT:    add x8, x0, #32
; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x8, #-2, mul vl]
; CHECK-NEXT:    ret
entry:
  %add.ptr = getelementptr inbounds i8, ptr %b, i64 32
  %0 = tail call i64 @llvm.vscale.i64()
  %1 = mul i64 %0, -32
  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1
  %2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16
  ret <vscale x 2 x i64> %2
}

declare i64 @llvm.vscale.i64()