; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=ALL,BE
; RUN: opt < %s -passes=instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=ALL,LE
declare void @use(i16)
declare void @use_vec(<8 x i16>)
define <4 x i16> @insert_01_poison_v4i16(i32 %x) {
; BE-LABEL: @insert_01_poison_v4i16(
; BE-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; BE-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16
; BE-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16
; BE-NEXT: [[INS0:%.*]] = insertelement <4 x i16> poison, i16 [[LO16]], i64 0
; BE-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[INS0]], i16 [[HI16]], i64 1
; BE-NEXT: ret <4 x i16> [[INS1]]
;
; LE-LABEL: @insert_01_poison_v4i16(
; LE-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i64 0
; LE-NEXT: [[INS1:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x i16>
; LE-NEXT: ret <4 x i16> [[INS1]]
;
%hi32 = lshr i32 %x, 16
%hi16 = trunc i32 %hi32 to i16
%lo16 = trunc i32 %x to i16
%ins0 = insertelement <4 x i16> poison, i16 %lo16, i64 0
%ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 1
ret <4 x i16> %ins1
}
define <8 x i16> @insert_10_poison_v8i16(i32 %x) {
; BE-LABEL: @insert_10_poison_v8i16(
; BE-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
; BE-NEXT: [[INS1:%.*]] = bitcast <4 x i32> [[TMP1]] to <8 x i16>
; BE-NEXT: ret <8 x i16> [[INS1]]
;
; LE-LABEL: @insert_10_poison_v8i16(
; LE-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; LE-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16
; LE-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16
; LE-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[HI16]], i64 0
; LE-NEXT: [[INS1:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[LO16]], i64 1
; LE-NEXT: ret <8 x i16> [[INS1]]
;
%hi32 = lshr i32 %x, 16
%hi16 = trunc i32 %hi32 to i16
%lo16 = trunc i32 %x to i16
%ins0 = insertelement <8 x i16> poison, i16 %lo16, i64 1
%ins1 = insertelement <8 x i16> %ins0, i16 %hi16, i64 0
ret <8 x i16> %ins1
}
; negative test - larger element is not aligned in the vector
define <4 x i32> @insert_12_poison_v4i32(i64 %x) {
; ALL-LABEL: @insert_12_poison_v4i32(
; ALL-NEXT: [[HI64:%.*]] = lshr i64 [[X:%.*]], 32
; ALL-NEXT: [[HI32:%.*]] = trunc nuw i64 [[HI64]] to i32
; ALL-NEXT: [[LO32:%.*]] = trunc i64 [[X]] to i32
; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i32> poison, i32 [[LO32]], i64 1
; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i32> [[INS0]], i32 [[HI32]], i64 2
; ALL-NEXT: ret <4 x i32> [[INS1]]
;
%hi64 = lshr i64 %x, 32
%hi32 = trunc i64 %hi64 to i32
%lo32 = trunc i64 %x to i32
%ins0 = insertelement <4 x i32> poison, i32 %lo32, i64 1
%ins1 = insertelement <4 x i32> %ins0, i32 %hi32, i64 2
ret <4 x i32> %ins1
}
; negative test - larger element is not aligned in the vector
define <4 x i16> @insert_21_poison_v4i16(i32 %x) {
; ALL-LABEL: @insert_21_poison_v4i16(
; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16
; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[HI16]], i64 1
; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[LO16]], i64 2
; ALL-NEXT: ret <4 x i16> [[INS1]]
;
%hi32 = lshr i32 %x, 16
%hi16 = trunc i32 %hi32 to i16
%lo16 = trunc i32 %x to i16
%ins0 = insertelement <4 x i16> poison, i16 %lo16, i64 2
%ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 1
ret <4 x i16> %ins1
}
define <4 x i32> @insert_23_poison_v4i32(i64 %x) {
; BE-LABEL: @insert_23_poison_v4i32(
; BE-NEXT: [[HI64:%.*]] = lshr i64 [[X:%.*]], 32
; BE-NEXT: [[HI32:%.*]] = trunc nuw i64 [[HI64]] to i32
; BE-NEXT: [[LO32:%.*]] = trunc i64 [[X]] to i32
; BE-NEXT: [[INS0:%.*]] = insertelement <4 x i32> poison, i32 [[LO32]], i64 2
; BE-NEXT: [[INS1:%.*]] = insertelement <4 x i32> [[INS0]], i32 [[HI32]], i64 3
; BE-NEXT: ret <4 x i32> [[INS1]]
;
; LE-LABEL: @insert_23_poison_v4i32(
; LE-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i64 1
; LE-NEXT: [[INS1:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32>
; LE-NEXT: ret <4 x i32> [[INS1]]
;
%hi64 = lshr i64 %x, 32
%hi32 = trunc i64 %hi64 to i32
%lo32 = trunc i64 %x to i32
%ins0 = insertelement <4 x i32> poison, i32 %lo32, i64 2
%ins1 = insertelement <4 x i32> %ins0, i32 %hi32, i64 3
ret <4 x i32> %ins1
}
define <4 x i16> @insert_32_poison_v4i16(i32 %x) {
; BE-LABEL: @insert_32_poison_v4i16(
; BE-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i64 1
; BE-NEXT: [[INS1:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x i16>
; BE-NEXT: ret <4 x i16> [[INS1]]
;
; LE-LABEL: @insert_32_poison_v4i16(
; LE-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; LE-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16
; LE-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16
; LE-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[HI16]], i64 2
; LE-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[LO16]], i64 3
; LE-NEXT: ret <4 x i16> [[INS1]]
;
%hi32 = lshr i32 %x, 16
%hi16 = trunc i32 %hi32 to i16
%lo16 = trunc i32 %x to i16
%ins0 = insertelement <4 x i16> poison, i16 %lo16, i64 3
%ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 2
ret <4 x i16> %ins1
}
; Similar to the above tests but with a non-poison base vector.
; Vector is same size as scalar, so this is just a cast.
; TODO: Could be swapped/rotated into place.
define <2 x i16> @insert_01_v2i16(i32 %x, <2 x i16> %v) {
; BE-LABEL: @insert_01_v2i16(
; BE-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; BE-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16
; BE-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16
; BE-NEXT: [[INS0:%.*]] = insertelement <2 x i16> poison, i16 [[LO16]], i64 0
; BE-NEXT: [[INS1:%.*]] = insertelement <2 x i16> [[INS0]], i16 [[HI16]], i64 1
; BE-NEXT: ret <2 x i16> [[INS1]]
;
; LE-LABEL: @insert_01_v2i16(
; LE-NEXT: [[INS1:%.*]] = bitcast i32 [[X:%.*]] to <2 x i16>
; LE-NEXT: ret <2 x i16> [[INS1]]
;
%hi32 = lshr i32 %x, 16
%hi16 = trunc i32 %hi32 to i16
%lo16 = trunc i32 %x to i16
%ins0 = insertelement <2 x i16> %v, i16 %lo16, i64 0
%ins1 = insertelement <2 x i16> %ins0, i16 %hi16, i64 1
ret <2 x i16> %ins1
}
; negative test - can't do this safely without knowing something about the base vector
define <8 x i16> @insert_10_v8i16(i32 %x, <8 x i16> %v) {
; ALL-LABEL: @insert_10_v8i16(
; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16
; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[HI16]], i64 0
; ALL-NEXT: [[INS1:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[LO16]], i64 1
; ALL-NEXT: ret <8 x i16> [[INS1]]
;
%hi32 = lshr i32 %x, 16
%hi16 = trunc i32 %hi32 to i16
%lo16 = trunc i32 %x to i16
%ins0 = insertelement <8 x i16> %v, i16 %lo16, i64 1
%ins1 = insertelement <8 x i16> %ins0, i16 %hi16, i64 0
ret <8 x i16> %ins1
}
; negative test - larger element is not aligned in the vector
define <4 x i32> @insert_12_v4i32(i64 %x, <4 x i32> %v) {
; ALL-LABEL: @insert_12_v4i32(
; ALL-NEXT: [[HI64:%.*]] = lshr i64 [[X:%.*]], 32
; ALL-NEXT: [[HI32:%.*]] = trunc nuw i64 [[HI64]] to i32
; ALL-NEXT: [[LO32:%.*]] = trunc i64 [[X]] to i32
; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i32> [[V:%.*]], i32 [[LO32]], i64 1
; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i32> [[INS0]], i32 [[HI32]], i64 2
; ALL-NEXT: ret <4 x i32> [[INS1]]
;
%hi64 = lshr i64 %x, 32
%hi32 = trunc i64 %hi64 to i32
%lo32 = trunc i64 %x to i32
%ins0 = insertelement <4 x i32> %v, i32 %lo32, i64 1
%ins1 = insertelement <4 x i32> %ins0, i32 %hi32, i64 2
ret <4 x i32> %ins1
}
; negative test - larger element is not aligned in the vector
define <4 x i16> @insert_21_v4i16(i32 %x, <4 x i16> %v) {
; ALL-LABEL: @insert_21_v4i16(
; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16
; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[HI16]], i64 1
; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[LO16]], i64 2
; ALL-NEXT: ret <4 x i16> [[INS1]]
;
%hi32 = lshr i32 %x, 16
%hi16 = trunc i32 %hi32 to i16
%lo16 = trunc i32 %x to i16
%ins0 = insertelement <4 x i16> %v, i16 %lo16, i64 2
%ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 1
ret <4 x i16> %ins1
}
; negative test - can't do this safely without knowing something about the base vector
define <4 x i32> @insert_23_v4i32(i64 %x, <4 x i32> %v) {
; ALL-LABEL: @insert_23_v4i32(
; ALL-NEXT: [[HI64:%.*]] = lshr i64 [[X:%.*]], 32
; ALL-NEXT: [[HI32:%.*]] = trunc nuw i64 [[HI64]] to i32
; ALL-NEXT: [[LO32:%.*]] = trunc i64 [[X]] to i32
; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i32> [[V:%.*]], i32 [[LO32]], i64 2
; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i32> [[INS0]], i32 [[HI32]], i64 3
; ALL-NEXT: ret <4 x i32> [[INS1]]
;
%hi64 = lshr i64 %x, 32
%hi32 = trunc i64 %hi64 to i32
%lo32 = trunc i64 %x to i32
%ins0 = insertelement <4 x i32> %v, i32 %lo32, i64 2
%ins1 = insertelement <4 x i32> %ins0, i32 %hi32, i64 3
ret <4 x i32> %ins1
}
; negative test - can't do this safely without knowing something about the base vector
define <4 x i16> @insert_32_v4i16(i32 %x, <4 x i16> %v) {
; ALL-LABEL: @insert_32_v4i16(
; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16
; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[HI16]], i64 2
; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[LO16]], i64 3
; ALL-NEXT: ret <4 x i16> [[INS1]]
;
%hi32 = lshr i32 %x, 16
%hi16 = trunc i32 %hi32 to i16
%lo16 = trunc i32 %x to i16
%ins0 = insertelement <4 x i16> %v, i16 %lo16, i64 3
%ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 2
ret <4 x i16> %ins1
}
; negative test - need half-width shift
define <4 x i16> @insert_01_v4i16_wrong_shift1(i32 %x) {
; ALL-LABEL: @insert_01_v4i16_wrong_shift1(
; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 8
; ALL-NEXT: [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i16> poison, i16 [[LO16]], i64 0
; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[INS0]], i16 [[HI16]], i64 1
; ALL-NEXT: ret <4 x i16> [[INS1]]
;
%hi32 = lshr i32 %x, 8
%hi16 = trunc i32 %hi32 to i16
%lo16 = trunc i32 %x to i16
%ins0 = insertelement <4 x i16> poison, i16 %lo16, i64 0
%ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 1
ret <4 x i16> %ins1
}
; negative test - need common scalar
define <4 x i16> @insert_01_v4i16_wrong_op(i32 %x, i32 %y) {
; ALL-LABEL: @insert_01_v4i16_wrong_op(
; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16
; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[Y:%.*]] to i16
; ALL-NEXT: [[INS0:%.*]] = insertelement <4 x i16> poison, i16 [[LO16]], i64 0
; ALL-NEXT: [[INS1:%.*]] = insertelement <4 x i16> [[INS0]], i16 [[HI16]], i64 1
; ALL-NEXT: ret <4 x i16> [[INS1]]
;
%hi32 = lshr i32 %x, 16
%hi16 = trunc i32 %hi32 to i16
%lo16 = trunc i32 %y to i16
%ins0 = insertelement <4 x i16> poison, i16 %lo16, i64 0
%ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 1
ret <4 x i16> %ins1
}
; TODO: extra use doesn't have to prevent the fold.
define <8 x i16> @insert_67_v4i16_uses1(i32 %x, <8 x i16> %v) {
; ALL-LABEL: @insert_67_v4i16_uses1(
; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16
; ALL-NEXT: call void @use(i16 [[HI16]])
; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT: [[INS0:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[LO16]], i64 6
; ALL-NEXT: [[INS1:%.*]] = insertelement <8 x i16> [[INS0]], i16 [[HI16]], i64 7
; ALL-NEXT: ret <8 x i16> [[INS1]]
;
%hi32 = lshr i32 %x, 16
%hi16 = trunc i32 %hi32 to i16
call void @use(i16 %hi16)
%lo16 = trunc i32 %x to i16
%ins0 = insertelement <8 x i16> %v, i16 %lo16, i64 6
%ins1 = insertelement <8 x i16> %ins0, i16 %hi16, i64 7
ret <8 x i16> %ins1
}
; negative test - can't do this safely without knowing something about the base vector
; extra use would be ok
define <8 x i16> @insert_76_v4i16_uses2(i32 %x, <8 x i16> %v) {
; ALL-LABEL: @insert_76_v4i16_uses2(
; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16
; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT: call void @use(i16 [[LO16]])
; ALL-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[HI16]], i64 6
; ALL-NEXT: [[INS1:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[LO16]], i64 7
; ALL-NEXT: ret <8 x i16> [[INS1]]
;
%hi32 = lshr i32 %x, 16
%hi16 = trunc i32 %hi32 to i16
%lo16 = trunc i32 %x to i16
call void @use(i16 %lo16)
%ins0 = insertelement <8 x i16> %v, i16 %lo16, i64 7
%ins1 = insertelement <8 x i16> %ins0, i16 %hi16, i64 6
ret <8 x i16> %ins1
}
; TODO: extra use doesn't have to prevent the fold.
define <8 x i16> @insert_67_v4i16_uses3(i32 %x, <8 x i16> %v) {
; ALL-LABEL: @insert_67_v4i16_uses3(
; ALL-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16
; ALL-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT: [[INS0:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[LO16]], i64 6
; ALL-NEXT: call void @use_vec(<8 x i16> [[INS0]])
; ALL-NEXT: [[INS1:%.*]] = insertelement <8 x i16> [[INS0]], i16 [[HI16]], i64 7
; ALL-NEXT: ret <8 x i16> [[INS1]]
;
%hi32 = lshr i32 %x, 16
%hi16 = trunc i32 %hi32 to i16
%lo16 = trunc i32 %x to i16
%ins0 = insertelement <8 x i16> %v, i16 %lo16, i64 6
call void @use_vec(<8 x i16> %ins0)
%ins1 = insertelement <8 x i16> %ins0, i16 %hi16, i64 7
ret <8 x i16> %ins1
}
; TODO: This is equivalent to the 1st test.
define <4 x i16> @insert_01_poison_v4i16_high_first(i32 %x) {
; BE-LABEL: @insert_01_poison_v4i16_high_first(
; BE-NEXT: [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; BE-NEXT: [[HI16:%.*]] = trunc nuw i32 [[HI32]] to i16
; BE-NEXT: [[LO16:%.*]] = trunc i32 [[X]] to i16
; BE-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[LO16]], i64 0
; BE-NEXT: [[INS0:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[HI16]], i64 1
; BE-NEXT: ret <4 x i16> [[INS0]]
;
; LE-LABEL: @insert_01_poison_v4i16_high_first(
; LE-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i64 0
; LE-NEXT: [[INS0:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x i16>
; LE-NEXT: ret <4 x i16> [[INS0]]
;
%hi32 = lshr i32 %x, 16
%hi16 = trunc i32 %hi32 to i16
%lo16 = trunc i32 %x to i16
%ins1 = insertelement <4 x i16> poison, i16 %hi16, i64 1
%ins0 = insertelement <4 x i16> %ins1, i16 %lo16, i64 0
ret <4 x i16> %ins0
}