llvm/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool opt --version 4
;
; RUN: opt < %s -passes=msan -S | FileCheck %s
;
; Forked from llvm/test/CodeGen/AArch64/arm64-vshift.ll

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-android9001"

define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @sqshl8b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i8> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <8 x i8>
; CHECK-NEXT:    [[TMP17:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp2 = load <8 x i8>, ptr %B
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
  ret <8 x i8> %tmp3
}

define <4 x i16> @sqshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @sqshl4h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP17:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp2 = load <4 x i16>, ptr %B
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
  ret <4 x i16> %tmp3
}

define <2 x i32> @sqshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @sqshl2s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <2 x i32>
; CHECK-NEXT:    [[TMP17:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp2 = load <2 x i32>, ptr %B
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
  ret <2 x i32> %tmp3
}

define <1 x i64> @sqshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <1 x i64> @sqshl1d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <1 x i64>
; CHECK-NEXT:    [[TMP17:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]])
; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
;
  %tmp1 = load <1 x i64>, ptr %A
  %tmp2 = load <1 x i64>, ptr %B
  %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
  ret <1 x i64> %tmp3
}

define <1 x i64> @sqshl1d_constant(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <1 x i64> @sqshl1d_constant(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
;
  %tmp1 = load <1 x i64>, ptr %A
  %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
  ret <1 x i64> %tmp3
}

define i64 @sqshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define i64 @sqshl_scalar(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[_MSLD1]], 0
; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 [[_MSLD]], i64 [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP15]], [[TMP14]]
; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 [[TMP1]], i64 [[TMP2]])
; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
  %tmp1 = load i64, ptr %A
  %tmp2 = load i64, ptr %B
  %tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 %tmp2)
  ret i64 %tmp3
}

define i64 @sqshl_scalar_constant(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define i64 @sqshl_scalar_constant(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 [[_MSLD]], i64 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP7]], 0
; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 [[TMP1]], i64 1)
; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
  %tmp1 = load i64, ptr %A
  %tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 1)
  ret i64 %tmp3
}

define <8 x i8> @uqshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @uqshl8b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i8> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <8 x i8>
; CHECK-NEXT:    [[TMP17:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp2 = load <8 x i8>, ptr %B
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
  ret <8 x i8> %tmp3
}

define <4 x i16> @uqshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @uqshl4h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP17:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp2 = load <4 x i16>, ptr %B
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
  ret <4 x i16> %tmp3
}

define <2 x i32> @uqshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @uqshl2s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <2 x i32>
; CHECK-NEXT:    [[TMP17:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp2 = load <2 x i32>, ptr %B
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
  ret <2 x i32> %tmp3
}

define <16 x i8> @sqshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @sqshl16b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x i8> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <16 x i8>
; CHECK-NEXT:    [[TMP18:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
;
  %tmp1 = load <16 x i8>, ptr %A
  %tmp2 = load <16 x i8>, ptr %B
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
  ret <16 x i8> %tmp3
}

define <8 x i16> @sqshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @sqshl8h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <8 x i16>
; CHECK-NEXT:    [[TMP18:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp2 = load <8 x i16>, ptr %B
  %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
  ret <8 x i16> %tmp3
}

define <4 x i32> @sqshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @sqshl4s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <4 x i32>
; CHECK-NEXT:    [[TMP18:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp2 = load <4 x i32>, ptr %B
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
  ret <4 x i32> %tmp3
}

define <2 x i64> @sqshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @sqshl2d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <2 x i64>
; CHECK-NEXT:    [[TMP18:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp2 = load <2 x i64>, ptr %B
  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
  ret <2 x i64> %tmp3
}

define <16 x i8> @uqshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @uqshl16b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x i8> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <16 x i8>
; CHECK-NEXT:    [[TMP18:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
;
  %tmp1 = load <16 x i8>, ptr %A
  %tmp2 = load <16 x i8>, ptr %B
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
  ret <16 x i8> %tmp3
}

define <8 x i16> @uqshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @uqshl8h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <8 x i16>
; CHECK-NEXT:    [[TMP18:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp2 = load <8 x i16>, ptr %B
  %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
  ret <8 x i16> %tmp3
}

define <4 x i32> @uqshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @uqshl4s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <4 x i32>
; CHECK-NEXT:    [[TMP18:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp2 = load <4 x i32>, ptr %B
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
  ret <4 x i32> %tmp3
}

define <2 x i64> @uqshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @uqshl2d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <2 x i64>
; CHECK-NEXT:    [[TMP18:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp2 = load <2 x i64>, ptr %B
  %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
  ret <2 x i64> %tmp3
}

define <1 x i64> @uqshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <1 x i64> @uqshl1d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <1 x i64>
; CHECK-NEXT:    [[TMP17:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]])
; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
;
  %tmp1 = load <1 x i64>, ptr %A
  %tmp2 = load <1 x i64>, ptr %B
  %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
  ret <1 x i64> %tmp3
}

define <1 x i64> @uqshl1d_constant(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <1 x i64> @uqshl1d_constant(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
;
  %tmp1 = load <1 x i64>, ptr %A
  %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
  ret <1 x i64> %tmp3
}

define i64 @uqshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define i64 @uqshl_scalar(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[_MSLD1]], 0
; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 [[_MSLD]], i64 [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP15]], [[TMP14]]
; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 [[TMP1]], i64 [[TMP2]])
; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
  %tmp1 = load i64, ptr %A
  %tmp2 = load i64, ptr %B
  %tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 %tmp2)
  ret i64 %tmp3
}

define i64 @uqshl_scalar_constant(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define i64 @uqshl_scalar_constant(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 [[_MSLD]], i64 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP7]], 0
; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 [[TMP1]], i64 1)
; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
  %tmp1 = load i64, ptr %A
  %tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 1)
  ret i64 %tmp3
}

declare <8 x i8>  @llvm.aarch64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.sqshl.i64(i64, i64) nounwind readnone


declare <8 x i8>  @llvm.aarch64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.uqshl.i64(i64, i64) nounwind readnone

declare <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone

declare <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone

define <8 x i8> @srshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @srshl8b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i8> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <8 x i8>
; CHECK-NEXT:    [[TMP17:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp2 = load <8 x i8>, ptr %B
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
  ret <8 x i8> %tmp3
}

define <4 x i16> @srshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @srshl4h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP17:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp2 = load <4 x i16>, ptr %B
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
  ret <4 x i16> %tmp3
}

define <2 x i32> @srshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @srshl2s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <2 x i32>
; CHECK-NEXT:    [[TMP17:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp2 = load <2 x i32>, ptr %B
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
  ret <2 x i32> %tmp3
}

define <1 x i64> @srshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <1 x i64> @srshl1d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <1 x i64>
; CHECK-NEXT:    [[TMP17:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]])
; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
;
  %tmp1 = load <1 x i64>, ptr %A
  %tmp2 = load <1 x i64>, ptr %B
  %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
  ret <1 x i64> %tmp3
}

define <1 x i64> @srshl1d_constant(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <1 x i64> @srshl1d_constant(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
;
  %tmp1 = load <1 x i64>, ptr %A
  %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
  ret <1 x i64> %tmp3
}

define i64 @srshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define i64 @srshl_scalar(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[_MSLD1]], 0
; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[_MSLD]], i64 [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP15]], [[TMP14]]
; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[TMP1]], i64 [[TMP2]])
; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
  %tmp1 = load i64, ptr %A
  %tmp2 = load i64, ptr %B
  %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 %tmp2)
  ret i64 %tmp3
}

define i64 @srshl_scalar_constant(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define i64 @srshl_scalar_constant(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[_MSLD]], i64 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP7]], 0
; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[TMP1]], i64 1)
; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
  %tmp1 = load i64, ptr %A
  %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 1)
  ret i64 %tmp3
}

define <8 x i8> @urshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @urshl8b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i8> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <8 x i8>
; CHECK-NEXT:    [[TMP17:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp2 = load <8 x i8>, ptr %B
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
  ret <8 x i8> %tmp3
}

define <4 x i16> @urshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @urshl4h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP17:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp2 = load <4 x i16>, ptr %B
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
  ret <4 x i16> %tmp3
}

define <2 x i32> @urshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @urshl2s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <2 x i32>
; CHECK-NEXT:    [[TMP17:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp2 = load <2 x i32>, ptr %B
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
  ret <2 x i32> %tmp3
}

define <1 x i64> @urshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <1 x i64> @urshl1d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <1 x i64>
; CHECK-NEXT:    [[TMP17:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]])
; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
;
  %tmp1 = load <1 x i64>, ptr %A
  %tmp2 = load <1 x i64>, ptr %B
  %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
  ret <1 x i64> %tmp3
}

define <1 x i64> @urshl1d_constant(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <1 x i64> @urshl1d_constant(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
;
  %tmp1 = load <1 x i64>, ptr %A
  %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
  ret <1 x i64> %tmp3
}

define i64 @urshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define i64 @urshl_scalar(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[_MSLD1]], 0
; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[_MSLD]], i64 [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP15]], [[TMP14]]
; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[TMP1]], i64 [[TMP2]])
; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
  %tmp1 = load i64, ptr %A
  %tmp2 = load i64, ptr %B
  %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 %tmp2)
  ret i64 %tmp3
}

define i64 @urshl_scalar_constant(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define i64 @urshl_scalar_constant(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[_MSLD]], i64 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP7]], 0
; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[TMP1]], i64 1)
; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
  %tmp1 = load i64, ptr %A
  %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 1)
  ret i64 %tmp3
}

define <16 x i8> @srshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @srshl16b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x i8> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <16 x i8>
; CHECK-NEXT:    [[TMP18:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
;
  %tmp1 = load <16 x i8>, ptr %A
  %tmp2 = load <16 x i8>, ptr %B
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
  ret <16 x i8> %tmp3
}

define <8 x i16> @srshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @srshl8h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <8 x i16>
; CHECK-NEXT:    [[TMP18:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp2 = load <8 x i16>, ptr %B
  %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
  ret <8 x i16> %tmp3
}

define <4 x i32> @srshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @srshl4s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <4 x i32>
; CHECK-NEXT:    [[TMP18:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp2 = load <4 x i32>, ptr %B
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
  ret <4 x i32> %tmp3
}

define <2 x i64> @srshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @srshl2d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <2 x i64>
; CHECK-NEXT:    [[TMP18:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp2 = load <2 x i64>, ptr %B
  %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
  ret <2 x i64> %tmp3
}

define <16 x i8> @urshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @urshl16b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x i8> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <16 x i8>
; CHECK-NEXT:    [[TMP18:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
;
  %tmp1 = load <16 x i8>, ptr %A
  %tmp2 = load <16 x i8>, ptr %B
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
  ret <16 x i8> %tmp3
}

define <8 x i16> @urshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @urshl8h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <8 x i16>
; CHECK-NEXT:    [[TMP18:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp2 = load <8 x i16>, ptr %B
  %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
  ret <8 x i16> %tmp3
}

define <4 x i32> @urshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @urshl4s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <4 x i32>
; CHECK-NEXT:    [[TMP18:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp2 = load <4 x i32>, ptr %B
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
  ret <4 x i32> %tmp3
}

define <2 x i64> @urshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @urshl2d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <2 x i64>
; CHECK-NEXT:    [[TMP18:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp2 = load <2 x i64>, ptr %B
  %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
  ret <2 x i64> %tmp3
}

declare <8 x i8>  @llvm.aarch64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.srshl.i64(i64, i64) nounwind readnone

declare <8 x i8>  @llvm.aarch64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.urshl.i64(i64, i64) nounwind readnone

declare <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone

declare <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone

define <8 x i8> @sqrshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @sqrshl8b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i8> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <8 x i8>
; CHECK-NEXT:    [[TMP17:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp2 = load <8 x i8>, ptr %B
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
  ret <8 x i8> %tmp3
}

define <4 x i16> @sqrshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @sqrshl4h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP17:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp2 = load <4 x i16>, ptr %B
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
  ret <4 x i16> %tmp3
}

define <2 x i32> @sqrshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @sqrshl2s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <2 x i32>
; CHECK-NEXT:    [[TMP17:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp2 = load <2 x i32>, ptr %B
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
  ret <2 x i32> %tmp3
}

define <8 x i8> @uqrshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @uqrshl8b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i8> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <8 x i8>
; CHECK-NEXT:    [[TMP17:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp2 = load <8 x i8>, ptr %B
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
  ret <8 x i8> %tmp3
}

define <4 x i16> @uqrshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @uqrshl4h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <4 x i16>
; CHECK-NEXT:    [[TMP17:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp2 = load <4 x i16>, ptr %B
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
  ret <4 x i16> %tmp3
}

define <2 x i32> @uqrshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @uqrshl2s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <2 x i32>
; CHECK-NEXT:    [[TMP17:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp2 = load <2 x i32>, ptr %B
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
  ret <2 x i32> %tmp3
}

define <16 x i8> @sqrshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @sqrshl16b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x i8> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <16 x i8>
; CHECK-NEXT:    [[TMP18:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
;
  %tmp1 = load <16 x i8>, ptr %A
  %tmp2 = load <16 x i8>, ptr %B
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
  ret <16 x i8> %tmp3
}

define <8 x i16> @sqrshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @sqrshl8h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <8 x i16>
; CHECK-NEXT:    [[TMP18:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp2 = load <8 x i16>, ptr %B
  %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
  ret <8 x i16> %tmp3
}

define <4 x i32> @sqrshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @sqrshl4s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <4 x i32>
; CHECK-NEXT:    [[TMP18:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp2 = load <4 x i32>, ptr %B
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
  ret <4 x i32> %tmp3
}

define <2 x i64> @sqrshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @sqrshl2d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <2 x i64>
; CHECK-NEXT:    [[TMP18:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp2 = load <2 x i64>, ptr %B
  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
  ret <2 x i64> %tmp3
}

define <1 x i64> @sqrshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <1 x i64> @sqrshl1d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <1 x i64>
; CHECK-NEXT:    [[TMP17:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]])
; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
;
  %tmp1 = load <1 x i64>, ptr %A
  %tmp2 = load <1 x i64>, ptr %B
  %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
  ret <1 x i64> %tmp3
}

define <1 x i64> @sqrshl1d_constant(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <1 x i64> @sqrshl1d_constant(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
;
  %tmp1 = load <1 x i64>, ptr %A
  %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
  ret <1 x i64> %tmp3
}

define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define i64 @sqrshl_scalar(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[_MSLD1]], 0
; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 [[_MSLD]], i64 [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP15]], [[TMP14]]
; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 [[TMP1]], i64 [[TMP2]])
; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
  %tmp1 = load i64, ptr %A
  %tmp2 = load i64, ptr %B
  %tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 %tmp2)
  ret i64 %tmp3
}

define i64 @sqrshl_scalar_constant(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define i64 @sqrshl_scalar_constant(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 [[_MSLD]], i64 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP7]], 0
; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 [[TMP1]], i64 1)
; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
  %tmp1 = load i64, ptr %A
  %tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 1)
  ret i64 %tmp3
}

define <16 x i8> @uqrshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @uqrshl16b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x i8> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <16 x i8>
; CHECK-NEXT:    [[TMP18:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
;
  %tmp1 = load <16 x i8>, ptr %A
  %tmp2 = load <16 x i8>, ptr %B
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
  ret <16 x i8> %tmp3
}

define <8 x i16> @uqrshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @uqrshl8h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <8 x i16>
; CHECK-NEXT:    [[TMP18:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp2 = load <8 x i16>, ptr %B
  %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
  ret <8 x i16> %tmp3
}

define <4 x i32> @uqrshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @uqrshl4s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <4 x i32>
; CHECK-NEXT:    [[TMP18:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp2 = load <4 x i32>, ptr %B
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
  ret <4 x i32> %tmp3
}

define <2 x i64> @uqrshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @uqrshl2d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <2 x i64>
; CHECK-NEXT:    [[TMP18:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP18]], [[TMP17]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp2 = load <2 x i64>, ptr %B
  %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
  ret <2 x i64> %tmp3
}

define <1 x i64> @uqrshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <1 x i64> @uqrshl1d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[_MSLD1]] to i64
; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <1 x i64>
; CHECK-NEXT:    [[TMP17:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP17]], [[TMP16]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]])
; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
;
  %tmp1 = load <1 x i64>, ptr %A
  %tmp2 = load <1 x i64>, ptr %B
  %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
  ret <1 x i64> %tmp3
}

define <1 x i64> @uqrshl1d_constant(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <1 x i64> @uqrshl1d_constant(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
;
  %tmp1 = load <1 x i64>, ptr %A
  %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
  ret <1 x i64> %tmp3
}

define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define i64 @uqrshl_scalar(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[_MSLD1]], 0
; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 [[_MSLD]], i64 [[TMP2]])
; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP15]], [[TMP14]]
; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 [[TMP1]], i64 [[TMP2]])
; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
  %tmp1 = load i64, ptr %A
  %tmp2 = load i64, ptr %B
  %tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 %tmp2)
  ret i64 %tmp3
}

define i64 @uqrshl_scalar_constant(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define i64 @uqrshl_scalar_constant(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 [[_MSLD]], i64 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP7]], 0
; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 [[TMP1]], i64 1)
; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
  %tmp1 = load i64, ptr %A
  %tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 1)
  ret i64 %tmp3
}

declare <8 x i8>  @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.sqrshl.i64(i64, i64) nounwind readnone

declare <8 x i8>  @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.uqrshl.i64(i64, i64) nounwind readnone

declare <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone

declare <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone

define <8 x i8> @urshr8b(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @urshr8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
  ret <8 x i8> %tmp3
}

define <4 x i16> @urshr4h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @urshr4h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
  ret <4 x i16> %tmp3
}

define <2 x i32> @urshr2s(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @urshr2s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> <i32 -1, i32 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 -1, i32 -1>)
; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
  ret <2 x i32> %tmp3
}

define <16 x i8> @urshr16b(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @urshr16b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
;
  %tmp1 = load <16 x i8>, ptr %A
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
  ret <16 x i8> %tmp3
}

define <8 x i16> @urshr8h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @urshr8h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
  ret <8 x i16> %tmp3
}

define <4 x i32> @urshr4s(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @urshr4s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
  ret <4 x i32> %tmp3
}

define <2 x i64> @urshr2d(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @urshr2d(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> <i64 -1, i64 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> <i64 -1, i64 -1>)
; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
  ret <2 x i64> %tmp3
}

define <1 x i64> @urshr1d(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <1 x i64> @urshr1d(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 -1>)
; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
;
  %tmp1 = load <1 x i64>, ptr %A
  %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
  ret <1 x i64> %tmp3
}

define i64 @urshr_scalar(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define i64 @urshr_scalar(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[_MSLD]], i64 -1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP7]], 0
; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[TMP1]], i64 -1)
; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
  %tmp1 = load i64, ptr %A
  %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1)
  ret i64 %tmp3
}

define <8 x i8> @srshr8b(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @srshr8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
  ret <8 x i8> %tmp3
}

define <4 x i16> @srshr4h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @srshr4h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
  ret <4 x i16> %tmp3
}

define <2 x i32> @srshr2s(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @srshr2s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> <i32 -1, i32 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 -1, i32 -1>)
; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
  ret <2 x i32> %tmp3
}

define <16 x i8> @srshr16b(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @srshr16b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
;
  %tmp1 = load <16 x i8>, ptr %A
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
  ret <16 x i8> %tmp3
}

define <8 x i16> @srshr8h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @srshr8h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
  ret <8 x i16> %tmp3
}

define <4 x i32> @srshr4s(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @srshr4s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
  ret <4 x i32> %tmp3
}

define <2 x i64> @srshr2d(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @srshr2d(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> <i64 -1, i64 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> <i64 -1, i64 -1>)
; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
  ret <2 x i64> %tmp3
}

define <1 x i64> @srshr1d(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <1 x i64> @srshr1d(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 -1>)
; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
;
  %tmp1 = load <1 x i64>, ptr %A
  %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
  ret <1 x i64> %tmp3
}

define i64 @srshr_scalar(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define i64 @srshr_scalar(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[_MSLD]], i64 -1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP7]], 0
; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[TMP1]], i64 -1)
; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
  %tmp1 = load i64, ptr %A
  %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1)
  ret i64 %tmp3
}

define <8 x i8> @sqshlu8b(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @sqshlu8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[_MSLD]], <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
  ret <8 x i8> %tmp3
}

define <4 x i16> @sqshlu4h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @sqshlu4h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[_MSLD]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
  ret <4 x i16> %tmp3
}

define <2 x i32> @sqshlu2s(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @sqshlu2s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[_MSLD]], <2 x i32> <i32 1, i32 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>)
; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
  ret <2 x i32> %tmp3
}

define <16 x i8> @sqshlu16b(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @sqshlu16b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[_MSLD]], <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
;
  %tmp1 = load <16 x i8>, ptr %A
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
  ret <16 x i8> %tmp3
}

define <8 x i16> @sqshlu8h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @sqshlu8h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[_MSLD]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
  ret <8 x i16> %tmp3
}

define <4 x i32> @sqshlu4s(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @sqshlu4s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[_MSLD]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
  ret <4 x i32> %tmp3
}

define <2 x i64> @sqshlu2d(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @sqshlu2d(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[_MSLD]], <2 x i64> <i64 1, i64 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[TMP1]], <2 x i64> <i64 1, i64 1>)
; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
  ret <2 x i64> %tmp3
}

define <1 x i64> @sqshlu1d_constant(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <1 x i64> @sqshlu1d_constant(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
;
  %tmp1 = load <1 x i64>, ptr %A
  %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
  ret <1 x i64> %tmp3
}

define i64 @sqshlu_i64_constant(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define i64 @sqshlu_i64_constant(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 [[_MSLD]], i64 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP7]], 0
; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 [[TMP1]], i64 1)
; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
  %tmp1 = load i64, ptr %A
  %tmp3 = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %tmp1, i64 1)
  ret i64 %tmp3
}

define i32 @sqshlu_i32_constant(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define i32 @sqshlu_i32_constant(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP6]], align 4
; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 [[_MSLD]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP7]], 0
; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 [[TMP1]], i32 1)
; CHECK-NEXT:    store i32 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i32 [[TMP3]]
;
  %tmp1 = load i32, ptr %A
  %tmp3 = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %tmp1, i32 1)
  ret i32 %tmp3
}

declare <8 x i8>  @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
declare i64 @llvm.aarch64.neon.sqshlu.i64(i64, i64) nounwind readnone
declare i32 @llvm.aarch64.neon.sqshlu.i32(i32, i32) nounwind readnone

declare <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone

define <8 x i8> @rshrn8b(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @rshrn8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[_MSLD]], i32 1)
; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
; CHECK-NEXT:    store <8 x i8> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
  ret <8 x i8> %tmp3
}

define <4 x i16> @rshrn4h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @rshrn4h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[_MSLD]], i32 1)
; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
; CHECK-NEXT:    store <4 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
  ret <4 x i16> %tmp3
}

define <2 x i32> @rshrn2s(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @rshrn2s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[_MSLD]], i32 1)
; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
; CHECK-NEXT:    store <2 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
  ret <2 x i32> %tmp3
}

define <16 x i8> @rshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @rshrn16b(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[_MSLD1]], i32 1)
; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP4]]
;
  %out = load <8 x i8>, ptr %ret
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
  %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x i8> %tmp4
}

define <8 x i16> @rshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @rshrn8h(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[_MSLD1]], i32 1)
; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP4]]
;
  %out = load <4 x i16>, ptr %ret
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
  %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <8 x i16> %tmp4
}

define <4 x i32> @rshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @rshrn4s(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[_MSLD1]], i32 1)
; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
;
  %out = load <2 x i32>, ptr %ret
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
  %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  ret <4 x i32> %tmp4
}

declare <8 x i8>  @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone

define <8 x i8> @shrn8b(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @shrn8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = lshr <8 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[_MSPROP:%.*]] = trunc <8 x i16> [[TMP8]] to <8 x i8>
; CHECK-NEXT:    [[TMP3:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
  %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
  ret <8 x i8> %tmp3
}

define <4 x i16> @shrn4h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @shrn4h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = lshr <4 x i32> [[_MSLD]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT:    [[_MSPROP:%.*]] = trunc <4 x i32> [[TMP8]] to <4 x i16>
; CHECK-NEXT:    [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
  %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
  ret <4 x i16> %tmp3
}

define <2 x i32> @shrn2s(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @shrn2s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = lshr <2 x i64> [[_MSLD]], <i64 1, i64 1>
; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
; CHECK-NEXT:    [[_MSPROP:%.*]] = trunc <2 x i64> [[TMP8]] to <2 x i32>
; CHECK-NEXT:    [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
  %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
  ret <2 x i32> %tmp3
}

define <16 x i8> @shrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @shrn16b(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = lshr <8 x i16> [[_MSLD1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[_MSPROP:%.*]] = trunc <8 x i16> [[TMP14]] to <8 x i8>
; CHECK-NEXT:    [[TMP3:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
; CHECK-NEXT:    [[_MSPROP2:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSPROP]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP4]]
;
  %out = load <8 x i8>, ptr %ret
  %tmp1 = load <8 x i16>, ptr %A
  %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
  %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
  %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x i8> %tmp4
}

define <8 x i16> @shrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @shrn8h(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = lshr <4 x i32> [[_MSLD1]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT:    [[_MSPROP:%.*]] = trunc <4 x i32> [[TMP14]] to <4 x i16>
; CHECK-NEXT:    [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
; CHECK-NEXT:    [[_MSPROP2:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSPROP]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    store <8 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP4]]
;
  %out = load <4 x i16>, ptr %ret
  %tmp1 = load <4 x i32>, ptr %A
  %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
  %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
  %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <8 x i16> %tmp4
}

define <4 x i32> @shrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @shrn4s(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = lshr <2 x i64> [[_MSLD1]], <i64 1, i64 1>
; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
; CHECK-NEXT:    [[_MSPROP:%.*]] = trunc <2 x i64> [[TMP14]] to <2 x i32>
; CHECK-NEXT:    [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
; CHECK-NEXT:    [[_MSPROP2:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSPROP]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT:    store <4 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
;
  %out = load <2 x i32>, ptr %ret
  %tmp1 = load <2 x i64>, ptr %A
  %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
  %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
  %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  ret <4 x i32> %tmp4
}

declare <8 x i8>  @llvm.aarch64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone

define i32 @sqshrn1s(i64 %A) nounwind sanitize_memory {
; CHECK-LABEL: define i32 @sqshrn1s(
; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 [[TMP1]], i32 1)
; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP2]], 0
; CHECK-NEXT:    [[TMP:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 [[A]], i32 1)
; CHECK-NEXT:    store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i32 [[TMP]]
;
  %tmp = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %A, i32 1)
  ret i32 %tmp
}

define <8 x i8> @sqshrn8b(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @sqshrn8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[_MSLD]], i32 1)
; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
; CHECK-NEXT:    store <8 x i8> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
  ret <8 x i8> %tmp3
}

define <4 x i16> @sqshrn4h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @sqshrn4h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[_MSLD]], i32 1)
; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
; CHECK-NEXT:    store <4 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
  ret <4 x i16> %tmp3
}

define <2 x i32> @sqshrn2s(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @sqshrn2s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[_MSLD]], i32 1)
; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
; CHECK-NEXT:    store <2 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
  ret <2 x i32> %tmp3
}


define <16 x i8> @sqshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @sqshrn16b(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[_MSLD1]], i32 1)
; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP4]]
;
  %out = load <8 x i8>, ptr %ret
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
  %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x i8> %tmp4
}

define <8 x i16> @sqshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @sqshrn8h(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[_MSLD1]], i32 1)
; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP4]]
;
  %out = load <4 x i16>, ptr %ret
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
  %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <8 x i16> %tmp4
}

define <4 x i32> @sqshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @sqshrn4s(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[_MSLD1]], i32 1)
; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
;
  %out = load <2 x i32>, ptr %ret
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
  %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  ret <4 x i32> %tmp4
}

declare i32  @llvm.aarch64.neon.sqshrn.i32(i64, i32) nounwind readnone
declare <8 x i8>  @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone

define i32 @sqshrun1s(i64 %A) nounwind sanitize_memory {
; CHECK-LABEL: define i32 @sqshrun1s(
; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 [[TMP1]], i32 1)
; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP2]], 0
; CHECK-NEXT:    [[TMP:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 [[A]], i32 1)
; CHECK-NEXT:    store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i32 [[TMP]]
;
  %tmp = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %A, i32 1)
  ret i32 %tmp
}

define <8 x i8> @sqshrun8b(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @sqshrun8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[_MSLD]], i32 1)
; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP1]], i32 1)
; CHECK-NEXT:    store <8 x i8> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
  ret <8 x i8> %tmp3
}

define <4 x i16> @sqshrun4h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @sqshrun4h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[_MSLD]], i32 1)
; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP1]], i32 1)
; CHECK-NEXT:    store <4 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
  ret <4 x i16> %tmp3
}

define <2 x i32> @sqshrun2s(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @sqshrun2s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[_MSLD]], i32 1)
; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[TMP1]], i32 1)
; CHECK-NEXT:    store <2 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
  ret <2 x i32> %tmp3
}

define <16 x i8> @sqshrun16b(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @sqshrun16b(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[_MSLD1]], i32 1)
; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP1]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP4]]
;
  %out = load <8 x i8>, ptr %ret
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
  %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x i8> %tmp4
}

define <8 x i16> @sqshrun8h(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @sqshrun8h(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[_MSLD1]], i32 1)
; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP1]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP4]]
;
  %out = load <4 x i16>, ptr %ret
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
  %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <8 x i16> %tmp4
}

define <4 x i32> @sqshrun4s(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @sqshrun4s(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[_MSLD1]], i32 1)
; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[TMP1]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
;
  %out = load <2 x i32>, ptr %ret
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
  %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  ret <4 x i32> %tmp4
}

declare i32  @llvm.aarch64.neon.sqshrun.i32(i64, i32) nounwind readnone
declare <8 x i8>  @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone

define i32 @sqrshrn1s(i64 %A) nounwind sanitize_memory {
; CHECK-LABEL: define i32 @sqrshrn1s(
; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 [[TMP1]], i32 1)
; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP2]], 0
; CHECK-NEXT:    [[TMP:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 [[A]], i32 1)
; CHECK-NEXT:    store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i32 [[TMP]]
;
  %tmp = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %A, i32 1)
  ret i32 %tmp
}

define <8 x i8> @sqrshrn8b(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @sqrshrn8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[_MSLD]], i32 1)
; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
; CHECK-NEXT:    store <8 x i8> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
  ret <8 x i8> %tmp3
}

define <4 x i16> @sqrshrn4h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @sqrshrn4h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[_MSLD]], i32 1)
; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
; CHECK-NEXT:    store <4 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
  ret <4 x i16> %tmp3
}

define <2 x i32> @sqrshrn2s(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @sqrshrn2s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[_MSLD]], i32 1)
; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
; CHECK-NEXT:    store <2 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
  ret <2 x i32> %tmp3
}

define <16 x i8> @sqrshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @sqrshrn16b(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[_MSLD1]], i32 1)
; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP4]]
;
  %out = load <8 x i8>, ptr %ret
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
  %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x i8> %tmp4
}

define <8 x i16> @sqrshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @sqrshrn8h(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[_MSLD1]], i32 1)
; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP4]]
;
  %out = load <4 x i16>, ptr %ret
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
  %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <8 x i16> %tmp4
}

define <4 x i32> @sqrshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @sqrshrn4s(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[_MSLD1]], i32 1)
; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
;
  %out = load <2 x i32>, ptr %ret
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
  %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  ret <4 x i32> %tmp4
}

declare i32  @llvm.aarch64.neon.sqrshrn.i32(i64, i32) nounwind readnone
declare <8 x i8>  @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone

define i32 @sqrshrun1s(i64 %A) nounwind sanitize_memory {
; CHECK-LABEL: define i32 @sqrshrun1s(
; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 [[TMP1]], i32 1)
; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP2]], 0
; CHECK-NEXT:    [[TMP:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 [[A]], i32 1)
; CHECK-NEXT:    store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i32 [[TMP]]
;
  %tmp = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %A, i32 1)
  ret i32 %tmp
}

define <8 x i8> @sqrshrun8b(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @sqrshrun8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[_MSLD]], i32 1)
; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP1]], i32 1)
; CHECK-NEXT:    store <8 x i8> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
  ret <8 x i8> %tmp3
}

define <4 x i16> @sqrshrun4h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @sqrshrun4h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[_MSLD]], i32 1)
; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP1]], i32 1)
; CHECK-NEXT:    store <4 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
  ret <4 x i16> %tmp3
}

define <2 x i32> @sqrshrun2s(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @sqrshrun2s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[_MSLD]], i32 1)
; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[TMP1]], i32 1)
; CHECK-NEXT:    store <2 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
  ret <2 x i32> %tmp3
}

define <16 x i8> @sqrshrun16b(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @sqrshrun16b(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[_MSLD1]], i32 1)
; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP1]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP4]]
;
  %out = load <8 x i8>, ptr %ret
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
  %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x i8> %tmp4
}

define <8 x i16> @sqrshrun8h(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @sqrshrun8h(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[_MSLD1]], i32 1)
; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP1]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP4]]
;
  %out = load <4 x i16>, ptr %ret
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
  %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <8 x i16> %tmp4
}

define <4 x i32> @sqrshrun4s(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @sqrshrun4s(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[_MSLD1]], i32 1)
; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[TMP1]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
;
  %out = load <2 x i32>, ptr %ret
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
  %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  ret <4 x i32> %tmp4
}

declare i32  @llvm.aarch64.neon.sqrshrun.i32(i64, i32) nounwind readnone
declare <8 x i8>  @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone

define i32 @uqrshrn1s(i64 %A) nounwind sanitize_memory {
; CHECK-LABEL: define i32 @uqrshrn1s(
; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 [[TMP1]], i32 1)
; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP2]], 0
; CHECK-NEXT:    [[TMP:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 [[A]], i32 1)
; CHECK-NEXT:    store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i32 [[TMP]]
;
  %tmp = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %A, i32 1)
  ret i32 %tmp
}

define <8 x i8> @uqrshrn8b(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @uqrshrn8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[_MSLD]], i32 1)
; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
; CHECK-NEXT:    store <8 x i8> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
  ret <8 x i8> %tmp3
}

define <4 x i16> @uqrshrn4h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @uqrshrn4h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[_MSLD]], i32 1)
; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
; CHECK-NEXT:    store <4 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
  ret <4 x i16> %tmp3
}

define <2 x i32> @uqrshrn2s(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @uqrshrn2s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[_MSLD]], i32 1)
; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
; CHECK-NEXT:    store <2 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
  ret <2 x i32> %tmp3
}

define <16 x i8> @uqrshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @uqrshrn16b(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[_MSLD1]], i32 1)
; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP4]]
;
  %out = load <8 x i8>, ptr %ret
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
  %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x i8> %tmp4
}

define <8 x i16> @uqrshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @uqrshrn8h(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[_MSLD1]], i32 1)
; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP4]]
;
  %out = load <4 x i16>, ptr %ret
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
  %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <8 x i16> %tmp4
}

define <4 x i32> @uqrshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @uqrshrn4s(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[_MSLD1]], i32 1)
; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
;
  %out = load <2 x i32>, ptr %ret
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
  %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  ret <4 x i32> %tmp4
}

declare i32  @llvm.aarch64.neon.uqrshrn.i32(i64, i32) nounwind readnone
declare <8 x i8>  @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone

define i32 @uqshrn1s(i64 %A) nounwind sanitize_memory {
; CHECK-LABEL: define i32 @uqshrn1s(
; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 [[TMP1]], i32 1)
; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP2]], 0
; CHECK-NEXT:    [[TMP:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 [[A]], i32 1)
; CHECK-NEXT:    store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i32 [[TMP]]
;
  %tmp = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %A, i32 1)
  ret i32 %tmp
}

define <8 x i8> @uqshrn8b(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @uqshrn8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[_MSLD]], i32 1)
; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
; CHECK-NEXT:    store <8 x i8> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
  ret <8 x i8> %tmp3
}

define <4 x i16> @uqshrn4h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @uqshrn4h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[_MSLD]], i32 1)
; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
; CHECK-NEXT:    store <4 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
  ret <4 x i16> %tmp3
}

define <2 x i32> @uqshrn2s(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @uqshrn2s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[_MSLD]], i32 1)
; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
; CHECK-NEXT:    store <2 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
  ret <2 x i32> %tmp3
}

define <16 x i8> @uqshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @uqshrn16b(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[_MSLD1]], i32 1)
; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP4]]
;
  %out = load <8 x i8>, ptr %ret
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
  %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x i8> %tmp4
}

define <8 x i16> @uqshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @uqshrn8h(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[_MSLD1]], i32 1)
; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP4]]
;
  %out = load <4 x i16>, ptr %ret
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
  %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <8 x i16> %tmp4
}

define <4 x i32> @uqshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @uqshrn4s(
; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[_MSLD1]], i32 1)
; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
;
  %out = load <2 x i32>, ptr %ret
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
  %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  ret <4 x i32> %tmp4
}

declare i32  @llvm.aarch64.neon.uqshrn.i32(i64, i32) nounwind readnone
declare <8 x i8>  @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone

define <8 x i16> @ushll8h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @ushll8h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <8 x i8> [[_MSLD]] to <8 x i16>
; CHECK-NEXT:    [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
  %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
  ret <8 x i16> %tmp3
}

define <4 x i32> @ushll4s(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @ushll4s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32>
; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
; CHECK-NEXT:    [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
  %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
  ret <4 x i32> %tmp3
}

define <2 x i64> @ushll2d(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @ushll2d(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <2 x i32> [[_MSLD]] to <2 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP]], <i64 1, i64 1>
; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
; CHECK-NEXT:    store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
  %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
  ret <2 x i64> %tmp3
}

define <8 x i16> @ushll2_8h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @ushll2_8h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[LOAD1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[LOAD1]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    [[_MSPROP1:%.*]] = zext <8 x i8> [[_MSPROP]] to <8 x i16>
; CHECK-NEXT:    [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %load1 = load <16 x i8>, ptr %A
  %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
  %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
  ret <8 x i16> %tmp3
}

define <4 x i32> @ushll2_4s(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @ushll2_4s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[LOAD1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[LOAD1]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[_MSPROP1:%.*]] = zext <4 x i16> [[_MSPROP]] to <4 x i32>
; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
; CHECK-NEXT:    [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP1]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %load1 = load <8 x i16>, ptr %A
  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
  %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
  ret <4 x i32> %tmp3
}

define <2 x i64> @ushll2_2d(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @ushll2_2d(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[LOAD1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[LOAD1]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT:    [[_MSPROP1:%.*]] = zext <2 x i32> [[_MSPROP]] to <2 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP1]], <i64 1, i64 1>
; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
; CHECK-NEXT:    store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %load1 = load <4 x i32>, ptr %A
  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
  %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
  %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
  ret <2 x i64> %tmp3
}

declare <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32>, <4 x i32>)
declare <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64>, <2 x i64>)
declare <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64>, <1 x i64>)
declare i64 @llvm.aarch64.neon.ushl.i64(i64, i64)

define <8 x i16> @neon.ushll8h_constant_shift(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @neon.ushll8h_constant_shift(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <8 x i8> [[_MSLD]] to <8 x i16>
; CHECK-NEXT:    [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[_MSPROP]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP2]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
; CHECK-NEXT:    store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
  %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
  ret <8 x i16> %tmp3
}

define <8 x i16> @neon.ushl8h_no_constant_shift(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @neon.ushl8h_no_constant_shift(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <8 x i8> [[_MSLD]] to <8 x i16>
; CHECK-NEXT:    [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128
; CHECK-NEXT:    [[TMP8:%.*]] = trunc i128 [[TMP7]] to i64
; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne i64 [[TMP8]], 0
; CHECK-NEXT:    [[TMP10:%.*]] = sext i1 [[TMP9]] to i128
; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i128 [[TMP10]] to <8 x i16>
; CHECK-NEXT:    [[TMP12:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[_MSPROP]], <8 x i16> [[TMP2]])
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <8 x i16> [[TMP12]], [[TMP11]]
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP2]], <8 x i16> [[TMP2]])
; CHECK-NEXT:    store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
  %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp2)
  ret <8 x i16> %tmp3
}

define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[A]], align 4
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i8>, ptr [[TMP6]], align 4
; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <4 x i8> [[_MSLD]] to <4 x i32>
; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32>
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT:    store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp1 = load <4 x i8>, ptr %A
  %tmp2 = zext <4 x i8> %tmp1 to <4 x i32>
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
  ret <4 x i32> %tmp3
}

define <8 x i16> @neon.ushl8_noext_constant_shift(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @neon.ushl8_noext_constant_shift(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
  ret <8 x i16> %tmp3
}

define <4 x i32> @neon.ushll4s_constant_shift(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @neon.ushll4s_constant_shift(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32>
; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT:    store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
  ret <4 x i32> %tmp3
}

; FIXME: unnecessary ushll.4s v0, v0, #0?
define <4 x i32> @neon.ushll4s_neg_constant_shift(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @neon.ushll4s_neg_constant_shift(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32>
; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
; CHECK-NEXT:    store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
  ret <4 x i32> %tmp3
}

; FIXME: should be constant folded.
define <4 x i32> @neon.ushll4s_constant_fold() nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @neon.ushll4s_constant_fold(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
  ret <4 x i32> %tmp3
}

define <2 x i64> @neon.ushll2d_constant_shift(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @neon.ushll2d_constant_shift(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <2 x i32> [[_MSLD]] to <2 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> <i64 1, i64 1>)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[TMP2]], <2 x i64> <i64 1, i64 1>)
; CHECK-NEXT:    store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
  %tmp3 = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 1, i64 1>)
  ret <2 x i64> %tmp3
}

define <1 x i64> @neon.ushl_vscalar_constant_shift(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <1 x i64> @neon.ushl_vscalar_constant_shift(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i32>, ptr [[A]], align 4
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i32>, ptr [[TMP6]], align 4
; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <1 x i32> [[_MSLD]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = zext <1 x i32> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[_MSPROP]], <1 x i64> <i64 1>)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[TMP2]], <1 x i64> <i64 1>)
; CHECK-NEXT:    store <1 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
;
  %tmp1 = load <1 x i32>, ptr %A
  %tmp2 = zext <1 x i32> %tmp1 to <1 x i64>
  %tmp3 = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %tmp2, <1 x i64> <i64 1>)
  ret <1 x i64> %tmp3
}

define i64 @neon.ushl_scalar_constant_shift(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define i64 @neon.ushl_scalar_constant_shift(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP6]], align 4
; CHECK-NEXT:    [[_MSPROP:%.*]] = zext i32 [[_MSLD]] to i64
; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 [[_MSPROP]], i64 1)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or i64 [[TMP7]], 0
; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 [[TMP2]], i64 1)
; CHECK-NEXT:    store i64 [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
  %tmp1 = load i32, ptr %A
  %tmp2 = zext i32 %tmp1 to i64
  %tmp3 = call i64 @llvm.aarch64.neon.ushl.i64(i64 %tmp2, i64 1)
  ret i64 %tmp3
}

define <8 x i16> @sshll8h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @sshll8h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <8 x i8> [[_MSLD]] to <8 x i16>
; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16>
; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
  %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
  ret <8 x i16> %tmp3
}

define <2 x i64> @sshll2d(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @sshll2d(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <2 x i32> [[_MSLD]] to <2 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP]], <i64 1, i64 1>
; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
; CHECK-NEXT:    store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
  %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
  ret <2 x i64> %tmp3
}

declare <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32>, <4 x i32>)
declare <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64>, <2 x i64>)
declare <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64>, <1 x i64>)
declare i64 @llvm.aarch64.neon.sshl.i64(i64, i64)

define <16 x i8> @neon.sshl16b_constant_shift(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @neon.sshl16b_constant_shift(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
;
  %tmp1 = load <16 x i8>, ptr %A
  %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
  ret <16 x i8> %tmp2
}

define <16 x i8> @neon.sshl16b_non_splat_constant_shift(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @neon.sshl16b_non_splat_constant_shift(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> <i8 6, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 6, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
;
  %tmp1 = load <16 x i8>, ptr %A
  %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 6, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
  ret <16 x i8> %tmp2
}

define <16 x i8> @neon.sshl16b_neg_constant_shift(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @neon.sshl16b_neg_constant_shift(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>)
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
;
  %tmp1 = load <16 x i8>, ptr %A
  %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>)
  ret <16 x i8> %tmp2
}

define <8 x i16> @neon.sshll8h_constant_shift(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @neon.sshll8h_constant_shift(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <8 x i8> [[_MSLD]] to <8 x i16>
; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16>
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[_MSPROP]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[TMP2]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
; CHECK-NEXT:    store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
  %tmp3 = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %tmp2, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
  ret <8 x i16> %tmp3
}

define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[A]], align 4
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i8>, ptr [[TMP6]], align 4
; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <4 x i8> [[_MSLD]] to <4 x i32>
; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i8> [[TMP1]] to <4 x i32>
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT:    store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp1 = load <4 x i8>, ptr %A
  %tmp2 = sext <4 x i8> %tmp1 to <4 x i32>
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
  ret <4 x i32> %tmp3
}

define <4 x i32> @neon.sshll4s_constant_shift(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @neon.sshll4s_constant_shift(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <4 x i16> [[_MSLD]] to <4 x i32>
; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT:    store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
  ret <4 x i32> %tmp3
}

define <4 x i32> @neon.sshll4s_neg_constant_shift(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @neon.sshll4s_neg_constant_shift(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <4 x i16> [[_MSLD]] to <4 x i32>
; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
; CHECK-NEXT:    store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
  ret <4 x i32> %tmp3
}

; FIXME: should be constant folded.
define <4 x i32> @neon.sshl4s_constant_fold() nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @neon.sshl4s_constant_fold(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 2, i32 2, i32 2>)
; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>)
; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>)
  ret <4 x i32> %tmp3
}

define <4 x i32> @neon.sshl4s_no_fold(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @neon.sshl4s_no_fold(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
  ret <4 x i32> %tmp3
}

define <2 x i64> @neon.sshll2d_constant_shift(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @neon.sshll2d_constant_shift(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <2 x i32> [[_MSLD]] to <2 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> <i64 1, i64 1>)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[TMP2]], <2 x i64> <i64 1, i64 1>)
; CHECK-NEXT:    store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 1, i64 1>)
  ret <2 x i64> %tmp3
}

define <1 x i64> @neon.sshll_vscalar_constant_shift(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <1 x i64> @neon.sshll_vscalar_constant_shift(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i32>, ptr [[A]], align 4
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i32>, ptr [[TMP6]], align 4
; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <1 x i32> [[_MSLD]] to <1 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = zext <1 x i32> [[TMP1]] to <1 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[_MSPROP]], <1 x i64> <i64 1>)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[TMP2]], <1 x i64> <i64 1>)
; CHECK-NEXT:    store <1 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
;
  %tmp1 = load <1 x i32>, ptr %A
  %tmp2 = zext <1 x i32> %tmp1 to <1 x i64>
  %tmp3 = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %tmp2, <1 x i64> <i64 1>)
  ret <1 x i64> %tmp3
}

define i64 @neon.sshll_scalar_constant_shift(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define i64 @neon.sshll_scalar_constant_shift(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP6]], align 4
; CHECK-NEXT:    [[_MSPROP:%.*]] = zext i32 [[_MSLD]] to i64
; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 [[_MSPROP]], i64 1)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or i64 [[TMP7]], 0
; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 [[TMP2]], i64 1)
; CHECK-NEXT:    store i64 [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
  %tmp1 = load i32, ptr %A
  %tmp2 = zext i32 %tmp1 to i64
  %tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 1)
  ret i64 %tmp3
}

define i64 @neon.sshll_scalar_constant_shift_m1(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define i64 @neon.sshll_scalar_constant_shift_m1(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP6]], align 4
; CHECK-NEXT:    [[_MSPROP:%.*]] = zext i32 [[_MSLD]] to i64
; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 [[_MSPROP]], i64 -1)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or i64 [[TMP7]], 0
; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 [[TMP2]], i64 -1)
; CHECK-NEXT:    store i64 [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP3]]
;
  %tmp1 = load i32, ptr %A
  %tmp2 = zext i32 %tmp1 to i64
  %tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 -1)
  ret i64 %tmp3
}

; FIXME: should be constant folded.
define <2 x i64> @neon.sshl2d_constant_fold() nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @neon.sshl2d_constant_fold(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> zeroinitializer, <2 x i64> <i64 1, i64 1>)
; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> <i64 99, i64 1000>, <2 x i64> <i64 1, i64 1>)
; CHECK-NEXT:    store <2 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> <i64 99, i64 1000>, <2 x i64> <i64 1, i64 1>)
  ret <2 x i64> %tmp3
}

define <2 x i64> @neon.sshl2d_no_fold(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @neon.sshl2d_no_fold(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> <i64 2, i64 2>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[TMP2]], <2 x i64> <i64 2, i64 2>)
; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %tmp2 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 2, i64 2>)
  ret <2 x i64> %tmp3
}

define <8 x i16> @sshll2_8h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @sshll2_8h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[LOAD1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[LOAD1]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    [[_MSPROP1:%.*]] = sext <8 x i8> [[_MSPROP]] to <8 x i16>
; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16>
; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %load1 = load <16 x i8>, ptr %A
  %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
  %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
  ret <8 x i16> %tmp3
}

define <4 x i32> @sshll2_4s(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @sshll2_4s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[LOAD1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[LOAD1]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[_MSPROP1:%.*]] = sext <4 x i16> [[_MSPROP]] to <4 x i32>
; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
; CHECK-NEXT:    [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP1]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %load1 = load <8 x i16>, ptr %A
  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
  %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
  ret <4 x i32> %tmp3
}

define <2 x i64> @sshll2_2d(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @sshll2_2d(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[LOAD1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[LOAD1]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT:    [[_MSPROP1:%.*]] = sext <2 x i32> [[_MSPROP]] to <2 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP1]], <i64 1, i64 1>
; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
; CHECK-NEXT:    store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %load1 = load <4 x i32>, ptr %A
  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
  %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
  %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
  ret <2 x i64> %tmp3
}

define <8 x i8> @sqshli8b(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @sqshli8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
  ret <8 x i8> %tmp3
}

define <4 x i16> @sqshli4h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @sqshli4h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
  ret <4 x i16> %tmp3
}

define <2 x i32> @sqshli2s(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @sqshli2s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> <i32 1, i32 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>)
; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
  ret <2 x i32> %tmp3
}

define <16 x i8> @sqshli16b(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @sqshli16b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
;
  %tmp1 = load <16 x i8>, ptr %A
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
  ret <16 x i8> %tmp3
}

define <8 x i16> @sqshli8h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @sqshli8h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
  ret <8 x i16> %tmp3
}

define <4 x i32> @sqshli4s(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @sqshli4s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
  ret <4 x i32> %tmp3
}

define <2 x i64> @sqshli2d(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @sqshli2d(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> <i64 1, i64 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> <i64 1, i64 1>)
; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
  ret <2 x i64> %tmp3
}

define <8 x i8> @uqshli8b(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @uqshli8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
  ret <8 x i8> %tmp3
}

define <8 x i8> @uqshli8b_1(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @uqshli8b_1(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
  ret <8 x i8> %tmp3
}

define <4 x i16> @uqshli4h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @uqshli4h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
  ret <4 x i16> %tmp3
}

define <2 x i32> @uqshli2s(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @uqshli2s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> <i32 1, i32 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>)
; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
  ret <2 x i32> %tmp3
}

define <16 x i8> @uqshli16b(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @uqshli16b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
;
  %tmp1 = load <16 x i8>, ptr %A
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
  ret <16 x i8> %tmp3
}

define <8 x i16> @uqshli8h(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @uqshli8h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
  ret <8 x i16> %tmp3
}

define <4 x i32> @uqshli4s(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @uqshli4s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
  ret <4 x i32> %tmp3
}

define <2 x i64> @uqshli2d(ptr %A) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @uqshli2d(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
; CHECK:       2:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       3:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> <i64 1, i64 1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> <i64 1, i64 1>)
; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
  ret <2 x i64> %tmp3
}

define <8 x i8> @ursra8b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @ursra8b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <8 x i8> [[_MSPROP]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <8 x i8> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <8 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP5]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
  %tmp4 = load <8 x i8>, ptr %B
  %tmp5 = add <8 x i8> %tmp3, %tmp4
  ret <8 x i8> %tmp5
}

define <4 x i16> @ursra4h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @ursra4h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <4 x i16> [[_MSPROP]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <4 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP5]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
  %tmp4 = load <4 x i16>, ptr %B
  %tmp5 = add <4 x i16> %tmp3, %tmp4
  ret <4 x i16> %tmp5
}

define <2 x i32> @ursra2s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @ursra2s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> <i32 -1, i32 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 -1, i32 -1>)
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <2 x i32> [[_MSPROP]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i32> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <2 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
  %tmp4 = load <2 x i32>, ptr %B
  %tmp5 = add <2 x i32> %tmp3, %tmp4
  ret <2 x i32> %tmp5
}

define <16 x i8> @ursra16b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @ursra16b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <16 x i8> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP5]]
;
  %tmp1 = load <16 x i8>, ptr %A
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
  %tmp4 = load <16 x i8>, ptr %B
  %tmp5 = add <16 x i8> %tmp3, %tmp4
  ret <16 x i8> %tmp5
}

define <8 x i16> @ursra8h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @ursra8h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <8 x i16> [[_MSPROP]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <8 x i16> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <8 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP5]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
  %tmp4 = load <8 x i16>, ptr %B
  %tmp5 = add <8 x i16> %tmp3, %tmp4
  ret <8 x i16> %tmp5
}

define <4 x i32> @ursra4s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @ursra4s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <4 x i32> [[_MSPROP]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <4 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP5]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
  %tmp4 = load <4 x i32>, ptr %B
  %tmp5 = add <4 x i32> %tmp3, %tmp4
  ret <4 x i32> %tmp5
}

define <2 x i64> @ursra2d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @ursra2d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> <i64 -1, i64 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> <i64 -1, i64 -1>)
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <2 x i64> [[_MSPROP]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <2 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
  %tmp4 = load <2 x i64>, ptr %B
  %tmp5 = add <2 x i64> %tmp3, %tmp4
  ret <2 x i64> %tmp5
}

define <1 x i64> @ursra1d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <1 x i64> @ursra1d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[TMP8:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 -1>)
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <1 x i64>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <1 x i64> [[_MSPROP]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <1 x i64> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <1 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP5]]
;
  %tmp1 = load <1 x i64>, ptr %A
  %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
  %tmp4 = load <1 x i64>, ptr %B
  %tmp5 = add <1 x i64> %tmp3, %tmp4
  ret <1 x i64> %tmp5
}

define i64 @ursra_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define i64 @ursra_scalar(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[TMP8:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[_MSLD]], i64 -1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP8]], 0
; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[TMP1]], i64 -1)
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[_MSPROP2:%.*]] = or i64 [[_MSPROP]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store i64 [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP5]]
;
  %tmp1 = load i64, ptr %A
  %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1)
  %tmp4 = load i64, ptr %B
  %tmp5 = add i64 %tmp3, %tmp4
  ret i64 %tmp5
}

define <8 x i8> @srsra8b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @srsra8b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <8 x i8> [[_MSPROP]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <8 x i8> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <8 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP5]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
  %tmp4 = load <8 x i8>, ptr %B
  %tmp5 = add <8 x i8> %tmp3, %tmp4
  ret <8 x i8> %tmp5
}

define <4 x i16> @srsra4h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @srsra4h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <4 x i16> [[_MSPROP]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <4 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP5]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
  %tmp4 = load <4 x i16>, ptr %B
  %tmp5 = add <4 x i16> %tmp3, %tmp4
  ret <4 x i16> %tmp5
}

define <2 x i32> @srsra2s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @srsra2s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> <i32 -1, i32 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 -1, i32 -1>)
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <2 x i32> [[_MSPROP]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i32> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <2 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
  %tmp4 = load <2 x i32>, ptr %B
  %tmp5 = add <2 x i32> %tmp3, %tmp4
  ret <2 x i32> %tmp5
}

define <16 x i8> @srsra16b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @srsra16b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <16 x i8> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP5]]
;
  %tmp1 = load <16 x i8>, ptr %A
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
  %tmp4 = load <16 x i8>, ptr %B
  %tmp5 = add <16 x i8> %tmp3, %tmp4
  ret <16 x i8> %tmp5
}

define <8 x i16> @srsra8h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @srsra8h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <8 x i16> [[_MSPROP]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <8 x i16> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <8 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP5]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
  %tmp4 = load <8 x i16>, ptr %B
  %tmp5 = add <8 x i16> %tmp3, %tmp4
  ret <8 x i16> %tmp5
}

define <4 x i32> @srsra4s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @srsra4s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <4 x i32> [[_MSPROP]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <4 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP5]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
  %tmp4 = load <4 x i32>, ptr %B
  %tmp5 = add <4 x i32> %tmp3, %tmp4
  ret <4 x i32> %tmp5
}

define <2 x i64> @srsra2d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @srsra2d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> <i64 -1, i64 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> <i64 -1, i64 -1>)
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <2 x i64> [[_MSPROP]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <2 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
  %tmp4 = load <2 x i64>, ptr %B
  %tmp5 = add <2 x i64> %tmp3, %tmp4
  ret <2 x i64> %tmp5
}

define <1 x i64> @srsra1d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <1 x i64> @srsra1d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[TMP8:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 -1>)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 -1>)
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <1 x i64>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <1 x i64> [[_MSPROP]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <1 x i64> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <1 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP5]]
;
  %tmp1 = load <1 x i64>, ptr %A
  %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
  %tmp4 = load <1 x i64>, ptr %B
  %tmp5 = add <1 x i64> %tmp3, %tmp4
  ret <1 x i64> %tmp5
}

define i64 @srsra_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define i64 @srsra_scalar(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[TMP8:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[_MSLD]], i64 -1)
; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP8]], 0
; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[TMP1]], i64 -1)
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[_MSPROP2:%.*]] = or i64 [[_MSPROP]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store i64 [[_MSPROP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret i64 [[TMP5]]
;
  %tmp1 = load i64, ptr %A
  %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1)
  %tmp4 = load i64, ptr %B
  %tmp5 = add i64 %tmp3, %tmp4
  ret i64 %tmp5
}

define <8 x i8> @usra8b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @usra8b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[TMP8:%.*]] = lshr <8 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = lshr <8 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP14]], align 8
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP9]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <8 x i8> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP5]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
  %tmp4 = load <8 x i8>, ptr %B
  %tmp5 = add <8 x i8> %tmp3, %tmp4
  ret <8 x i8> %tmp5
}

define <4 x i16> @usra4h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @usra4h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[TMP8:%.*]] = lshr <4 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[TMP9:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP14]], align 8
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP9]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP5]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
  %tmp4 = load <4 x i16>, ptr %B
  %tmp5 = add <4 x i16> %tmp3, %tmp4
  ret <4 x i16> %tmp5
}

define <2 x i32> @usra2s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @usra2s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[TMP8:%.*]] = lshr <2 x i32> [[_MSLD]], <i32 1, i32 1>
; CHECK-NEXT:    [[TMP9:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i32> [[TMP1]], <i32 1, i32 1>
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP14]], align 8
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP9]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i32> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
  %tmp4 = load <2 x i32>, ptr %B
  %tmp5 = add <2 x i32> %tmp3, %tmp4
  ret <2 x i32> %tmp5
}

define <16 x i8> @usra16b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @usra16b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[TMP8:%.*]] = lshr <16 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT:    [[TMP9:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = lshr <16 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP14]], align 16
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP9]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <16 x i8> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP5]]
;
  %tmp1 = load <16 x i8>, ptr %A
  %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
  %tmp4 = load <16 x i8>, ptr %B
  %tmp5 = add <16 x i8> %tmp3, %tmp4
  ret <16 x i8> %tmp5
}

define <8 x i16> @usra8h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @usra8h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[TMP8:%.*]] = lshr <8 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP14]], align 16
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP9]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <8 x i16> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP5]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
  %tmp4 = load <8 x i16>, ptr %B
  %tmp5 = add <8 x i16> %tmp3, %tmp4
  ret <8 x i16> %tmp5
}

define <4 x i32> @usra4s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @usra4s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[TMP8:%.*]] = lshr <4 x i32> [[_MSLD]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT:    [[TMP9:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP14]], align 16
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP9]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP5]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
  %tmp4 = load <4 x i32>, ptr %B
  %tmp5 = add <4 x i32> %tmp3, %tmp4
  ret <4 x i32> %tmp5
}

define <2 x i64> @usra2d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @usra2d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[TMP8:%.*]] = lshr <2 x i64> [[_MSLD]], <i64 1, i64 1>
; CHECK-NEXT:    [[TMP9:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP14]], align 16
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP9]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
  %tmp4 = load <2 x i64>, ptr %B
  %tmp5 = add <2 x i64> %tmp3, %tmp4
  ret <2 x i64> %tmp5
}

define <1 x i64> @usra1d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <1 x i64> @usra1d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[TMP8:%.*]] = lshr <1 x i64> [[_MSLD]], <i64 1>
; CHECK-NEXT:    [[TMP9:%.*]] = or <1 x i64> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1>
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <1 x i64>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP14]], align 8
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP9]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <1 x i64> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP5]]
;
  %tmp1 = load <1 x i64>, ptr %A
  %tmp3 = lshr <1 x i64> %tmp1, <i64 1>
  %tmp4 = load <1 x i64>, ptr %B
  %tmp5 = add <1 x i64> %tmp3, %tmp4
  ret <1 x i64> %tmp5
}

define <8 x i8> @ssra8b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @ssra8b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[TMP8:%.*]] = ashr <8 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = ashr <8 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP14]], align 8
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP9]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <8 x i8> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP5]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp3 = ashr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
  %tmp4 = load <8 x i8>, ptr %B
  %tmp5 = add <8 x i8> %tmp3, %tmp4
  ret <8 x i8> %tmp5
}

define <4 x i16> @ssra4h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @ssra4h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[TMP8:%.*]] = ashr <4 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[TMP9:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = ashr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP14]], align 8
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP9]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP5]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp3 = ashr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
  %tmp4 = load <4 x i16>, ptr %B
  %tmp5 = add <4 x i16> %tmp3, %tmp4
  ret <4 x i16> %tmp5
}

define <2 x i32> @ssra2s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @ssra2s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[TMP8:%.*]] = ashr <2 x i32> [[_MSLD]], <i32 1, i32 1>
; CHECK-NEXT:    [[TMP9:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = ashr <2 x i32> [[TMP1]], <i32 1, i32 1>
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP14]], align 8
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP9]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i32> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp3 = ashr <2 x i32> %tmp1, <i32 1, i32 1>
  %tmp4 = load <2 x i32>, ptr %B
  %tmp5 = add <2 x i32> %tmp3, %tmp4
  ret <2 x i32> %tmp5
}

define <16 x i8> @ssra16b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @ssra16b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[TMP8:%.*]] = ashr <16 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT:    [[TMP9:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = ashr <16 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP14]], align 16
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP9]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <16 x i8> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP5]]
;
  %tmp1 = load <16 x i8>, ptr %A
  %tmp3 = ashr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
  %tmp4 = load <16 x i8>, ptr %B
  %tmp5 = add <16 x i8> %tmp3, %tmp4
  ret <16 x i8> %tmp5
}

define <8 x i16> @ssra8h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @ssra8h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[TMP8:%.*]] = ashr <8 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = ashr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP14]], align 16
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP9]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <8 x i16> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP5]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp3 = ashr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
  %tmp4 = load <8 x i16>, ptr %B
  %tmp5 = add <8 x i16> %tmp3, %tmp4
  ret <8 x i16> %tmp5
}

define <4 x i32> @ssra4s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @ssra4s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[TMP8:%.*]] = ashr <4 x i32> [[_MSLD]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT:    [[TMP9:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP14]], align 16
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP9]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP5]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp3 = ashr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
  %tmp4 = load <4 x i32>, ptr %B
  %tmp5 = add <4 x i32> %tmp3, %tmp4
  ret <4 x i32> %tmp5
}

define <2 x i64> @ssra2d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @ssra2d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[TMP8:%.*]] = ashr <2 x i64> [[_MSLD]], <i64 1, i64 1>
; CHECK-NEXT:    [[TMP9:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = ashr <2 x i64> [[TMP1]], <i64 1, i64 1>
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
; CHECK:       10:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       11:
; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP14]], align 16
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP9]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp3 = ashr <2 x i64> %tmp1, <i64 1, i64 1>
  %tmp4 = load <2 x i64>, ptr %B
  %tmp5 = add <2 x i64> %tmp3, %tmp4
  ret <2 x i64> %tmp5
}

define <8 x i8> @shr_orr8b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @shr_orr8b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = lshr <8 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = lshr <8 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT:    [[TMP15:%.*]] = xor <8 x i8> [[TMP3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
; CHECK-NEXT:    [[TMP16:%.*]] = xor <8 x i8> [[TMP4]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
; CHECK-NEXT:    [[TMP17:%.*]] = and <8 x i8> [[TMP14]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP18:%.*]] = and <8 x i8> [[TMP15]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP19:%.*]] = and <8 x i8> [[TMP14]], [[TMP16]]
; CHECK-NEXT:    [[TMP20:%.*]] = or <8 x i8> [[TMP17]], [[TMP18]]
; CHECK-NEXT:    [[TMP21:%.*]] = or <8 x i8> [[TMP20]], [[TMP19]]
; CHECK-NEXT:    [[TMP5:%.*]] = or <8 x i8> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <8 x i8> [[TMP21]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP5]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp4 = load <8 x i8>, ptr %B
  %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
  %tmp5 = or <8 x i8> %tmp3, %tmp4
  ret <8 x i8> %tmp5
}

define <4 x i16> @shr_orr4h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @shr_orr4h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = lshr <4 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[TMP15:%.*]] = xor <4 x i16> [[TMP3]], <i16 -1, i16 -1, i16 -1, i16 -1>
; CHECK-NEXT:    [[TMP16:%.*]] = xor <4 x i16> [[TMP4]], <i16 -1, i16 -1, i16 -1, i16 -1>
; CHECK-NEXT:    [[TMP17:%.*]] = and <4 x i16> [[TMP14]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP18:%.*]] = and <4 x i16> [[TMP15]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP19:%.*]] = and <4 x i16> [[TMP14]], [[TMP16]]
; CHECK-NEXT:    [[TMP20:%.*]] = or <4 x i16> [[TMP17]], [[TMP18]]
; CHECK-NEXT:    [[TMP21:%.*]] = or <4 x i16> [[TMP20]], [[TMP19]]
; CHECK-NEXT:    [[TMP5:%.*]] = or <4 x i16> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <4 x i16> [[TMP21]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP5]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp4 = load <4 x i16>, ptr %B
  %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
  %tmp5 = or <4 x i16> %tmp3, %tmp4
  ret <4 x i16> %tmp5
}

define <2 x i32> @shr_orr2s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @shr_orr2s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = lshr <2 x i32> [[_MSLD]], <i32 1, i32 1>
; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i32> [[TMP1]], <i32 1, i32 1>
; CHECK-NEXT:    [[TMP15:%.*]] = xor <2 x i32> [[TMP3]], <i32 -1, i32 -1>
; CHECK-NEXT:    [[TMP16:%.*]] = xor <2 x i32> [[TMP4]], <i32 -1, i32 -1>
; CHECK-NEXT:    [[TMP17:%.*]] = and <2 x i32> [[TMP14]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP18:%.*]] = and <2 x i32> [[TMP15]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP19:%.*]] = and <2 x i32> [[TMP14]], [[TMP16]]
; CHECK-NEXT:    [[TMP20:%.*]] = or <2 x i32> [[TMP17]], [[TMP18]]
; CHECK-NEXT:    [[TMP21:%.*]] = or <2 x i32> [[TMP20]], [[TMP19]]
; CHECK-NEXT:    [[TMP5:%.*]] = or <2 x i32> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <2 x i32> [[TMP21]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp4 = load <2 x i32>, ptr %B
  %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
  %tmp5 = or <2 x i32> %tmp3, %tmp4
  ret <2 x i32> %tmp5
}

define <16 x i8> @shr_orr16b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @shr_orr16b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = lshr <16 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i8> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = lshr <16 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i8> [[TMP3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
; CHECK-NEXT:    [[TMP16:%.*]] = xor <16 x i8> [[TMP4]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
; CHECK-NEXT:    [[TMP17:%.*]] = and <16 x i8> [[TMP14]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP18:%.*]] = and <16 x i8> [[TMP15]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP19:%.*]] = and <16 x i8> [[TMP14]], [[TMP16]]
; CHECK-NEXT:    [[TMP20:%.*]] = or <16 x i8> [[TMP17]], [[TMP18]]
; CHECK-NEXT:    [[TMP21:%.*]] = or <16 x i8> [[TMP20]], [[TMP19]]
; CHECK-NEXT:    [[TMP5:%.*]] = or <16 x i8> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <16 x i8> [[TMP21]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP5]]
;
  %tmp1 = load <16 x i8>, ptr %A
  %tmp4 = load <16 x i8>, ptr %B
  %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
  %tmp5 = or <16 x i8> %tmp3, %tmp4
  ret <16 x i8> %tmp5
}

define <8 x i16> @shr_orr8h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @shr_orr8h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = lshr <8 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[TMP15:%.*]] = xor <8 x i16> [[TMP3]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
; CHECK-NEXT:    [[TMP16:%.*]] = xor <8 x i16> [[TMP4]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
; CHECK-NEXT:    [[TMP17:%.*]] = and <8 x i16> [[TMP14]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP18:%.*]] = and <8 x i16> [[TMP15]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP19:%.*]] = and <8 x i16> [[TMP14]], [[TMP16]]
; CHECK-NEXT:    [[TMP20:%.*]] = or <8 x i16> [[TMP17]], [[TMP18]]
; CHECK-NEXT:    [[TMP21:%.*]] = or <8 x i16> [[TMP20]], [[TMP19]]
; CHECK-NEXT:    [[TMP5:%.*]] = or <8 x i16> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <8 x i16> [[TMP21]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP5]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp4 = load <8 x i16>, ptr %B
  %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
  %tmp5 = or <8 x i16> %tmp3, %tmp4
  ret <8 x i16> %tmp5
}

define <4 x i32> @shr_orr4s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @shr_orr4s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = lshr <4 x i32> [[_MSLD]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT:    [[TMP15:%.*]] = xor <4 x i32> [[TMP3]], <i32 -1, i32 -1, i32 -1, i32 -1>
; CHECK-NEXT:    [[TMP16:%.*]] = xor <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1>
; CHECK-NEXT:    [[TMP17:%.*]] = and <4 x i32> [[TMP14]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP18:%.*]] = and <4 x i32> [[TMP15]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP19:%.*]] = and <4 x i32> [[TMP14]], [[TMP16]]
; CHECK-NEXT:    [[TMP20:%.*]] = or <4 x i32> [[TMP17]], [[TMP18]]
; CHECK-NEXT:    [[TMP21:%.*]] = or <4 x i32> [[TMP20]], [[TMP19]]
; CHECK-NEXT:    [[TMP5:%.*]] = or <4 x i32> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <4 x i32> [[TMP21]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP5]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp4 = load <4 x i32>, ptr %B
  %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
  %tmp5 = or <4 x i32> %tmp3, %tmp4
  ret <4 x i32> %tmp5
}

define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @shr_orr2d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = lshr <2 x i64> [[_MSLD]], <i64 1, i64 1>
; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
; CHECK-NEXT:    [[TMP15:%.*]] = xor <2 x i64> [[TMP3]], <i64 -1, i64 -1>
; CHECK-NEXT:    [[TMP16:%.*]] = xor <2 x i64> [[TMP4]], <i64 -1, i64 -1>
; CHECK-NEXT:    [[TMP17:%.*]] = and <2 x i64> [[TMP14]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP18:%.*]] = and <2 x i64> [[TMP15]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP19:%.*]] = and <2 x i64> [[TMP14]], [[TMP16]]
; CHECK-NEXT:    [[TMP20:%.*]] = or <2 x i64> [[TMP17]], [[TMP18]]
; CHECK-NEXT:    [[TMP21:%.*]] = or <2 x i64> [[TMP20]], [[TMP19]]
; CHECK-NEXT:    [[TMP5:%.*]] = or <2 x i64> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <2 x i64> [[TMP21]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp4 = load <2 x i64>, ptr %B
  %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
  %tmp5 = or <2 x i64> %tmp3, %tmp4
  ret <2 x i64> %tmp5
}

define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @shl_orr8b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = shl <8 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT:    [[TMP15:%.*]] = xor <8 x i8> [[TMP3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
; CHECK-NEXT:    [[TMP16:%.*]] = xor <8 x i8> [[TMP4]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
; CHECK-NEXT:    [[TMP17:%.*]] = and <8 x i8> [[TMP14]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP18:%.*]] = and <8 x i8> [[TMP15]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP19:%.*]] = and <8 x i8> [[TMP14]], [[TMP16]]
; CHECK-NEXT:    [[TMP20:%.*]] = or <8 x i8> [[TMP17]], [[TMP18]]
; CHECK-NEXT:    [[TMP21:%.*]] = or <8 x i8> [[TMP20]], [[TMP19]]
; CHECK-NEXT:    [[TMP5:%.*]] = or <8 x i8> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <8 x i8> [[TMP21]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP5]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp4 = load <8 x i8>, ptr %B
  %tmp3 = shl <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
  %tmp5 = or <8 x i8> %tmp3, %tmp4
  ret <8 x i8> %tmp5
}

define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @shl_orr4h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = shl <4 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[TMP15:%.*]] = xor <4 x i16> [[TMP3]], <i16 -1, i16 -1, i16 -1, i16 -1>
; CHECK-NEXT:    [[TMP16:%.*]] = xor <4 x i16> [[TMP4]], <i16 -1, i16 -1, i16 -1, i16 -1>
; CHECK-NEXT:    [[TMP17:%.*]] = and <4 x i16> [[TMP14]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP18:%.*]] = and <4 x i16> [[TMP15]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP19:%.*]] = and <4 x i16> [[TMP14]], [[TMP16]]
; CHECK-NEXT:    [[TMP20:%.*]] = or <4 x i16> [[TMP17]], [[TMP18]]
; CHECK-NEXT:    [[TMP21:%.*]] = or <4 x i16> [[TMP20]], [[TMP19]]
; CHECK-NEXT:    [[TMP5:%.*]] = or <4 x i16> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <4 x i16> [[TMP21]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP5]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp4 = load <4 x i16>, ptr %B
  %tmp3 = shl <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
  %tmp5 = or <4 x i16> %tmp3, %tmp4
  ret <4 x i16> %tmp5
}

define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @shl_orr2s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = shl <2 x i32> [[_MSLD]], <i32 1, i32 1>
; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1>
; CHECK-NEXT:    [[TMP15:%.*]] = xor <2 x i32> [[TMP3]], <i32 -1, i32 -1>
; CHECK-NEXT:    [[TMP16:%.*]] = xor <2 x i32> [[TMP4]], <i32 -1, i32 -1>
; CHECK-NEXT:    [[TMP17:%.*]] = and <2 x i32> [[TMP14]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP18:%.*]] = and <2 x i32> [[TMP15]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP19:%.*]] = and <2 x i32> [[TMP14]], [[TMP16]]
; CHECK-NEXT:    [[TMP20:%.*]] = or <2 x i32> [[TMP17]], [[TMP18]]
; CHECK-NEXT:    [[TMP21:%.*]] = or <2 x i32> [[TMP20]], [[TMP19]]
; CHECK-NEXT:    [[TMP5:%.*]] = or <2 x i32> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <2 x i32> [[TMP21]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp4 = load <2 x i32>, ptr %B
  %tmp3 = shl <2 x i32> %tmp1, <i32 1, i32 1>
  %tmp5 = or <2 x i32> %tmp3, %tmp4
  ret <2 x i32> %tmp5
}

define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @shl_orr16b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = shl <16 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i8> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <16 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i8> [[TMP3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
; CHECK-NEXT:    [[TMP16:%.*]] = xor <16 x i8> [[TMP4]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
; CHECK-NEXT:    [[TMP17:%.*]] = and <16 x i8> [[TMP14]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP18:%.*]] = and <16 x i8> [[TMP15]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP19:%.*]] = and <16 x i8> [[TMP14]], [[TMP16]]
; CHECK-NEXT:    [[TMP20:%.*]] = or <16 x i8> [[TMP17]], [[TMP18]]
; CHECK-NEXT:    [[TMP21:%.*]] = or <16 x i8> [[TMP20]], [[TMP19]]
; CHECK-NEXT:    [[TMP5:%.*]] = or <16 x i8> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <16 x i8> [[TMP21]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP5]]
;
  %tmp1 = load <16 x i8>, ptr %A
  %tmp4 = load <16 x i8>, ptr %B
  %tmp3 = shl <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
  %tmp5 = or <16 x i8> %tmp3, %tmp4
  ret <16 x i8> %tmp5
}

define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @shl_orr8h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = shl <8 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-NEXT:    [[TMP15:%.*]] = xor <8 x i16> [[TMP3]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
; CHECK-NEXT:    [[TMP16:%.*]] = xor <8 x i16> [[TMP4]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
; CHECK-NEXT:    [[TMP17:%.*]] = and <8 x i16> [[TMP14]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP18:%.*]] = and <8 x i16> [[TMP15]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP19:%.*]] = and <8 x i16> [[TMP14]], [[TMP16]]
; CHECK-NEXT:    [[TMP20:%.*]] = or <8 x i16> [[TMP17]], [[TMP18]]
; CHECK-NEXT:    [[TMP21:%.*]] = or <8 x i16> [[TMP20]], [[TMP19]]
; CHECK-NEXT:    [[TMP5:%.*]] = or <8 x i16> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <8 x i16> [[TMP21]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP5]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp4 = load <8 x i16>, ptr %B
  %tmp3 = shl <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
  %tmp5 = or <8 x i16> %tmp3, %tmp4
  ret <8 x i16> %tmp5
}

define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @shl_orr4s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = shl <4 x i32> [[_MSLD]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT:    [[TMP15:%.*]] = xor <4 x i32> [[TMP3]], <i32 -1, i32 -1, i32 -1, i32 -1>
; CHECK-NEXT:    [[TMP16:%.*]] = xor <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1>
; CHECK-NEXT:    [[TMP17:%.*]] = and <4 x i32> [[TMP14]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP18:%.*]] = and <4 x i32> [[TMP15]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP19:%.*]] = and <4 x i32> [[TMP14]], [[TMP16]]
; CHECK-NEXT:    [[TMP20:%.*]] = or <4 x i32> [[TMP17]], [[TMP18]]
; CHECK-NEXT:    [[TMP21:%.*]] = or <4 x i32> [[TMP20]], [[TMP19]]
; CHECK-NEXT:    [[TMP5:%.*]] = or <4 x i32> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <4 x i32> [[TMP21]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP5]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp4 = load <4 x i32>, ptr %B
  %tmp3 = shl <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
  %tmp5 = or <4 x i32> %tmp3, %tmp4
  ret <4 x i32> %tmp5
}

define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @shl_orr2d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = shl <2 x i64> [[_MSLD]], <i64 1, i64 1>
; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1>
; CHECK-NEXT:    [[TMP15:%.*]] = xor <2 x i64> [[TMP3]], <i64 -1, i64 -1>
; CHECK-NEXT:    [[TMP16:%.*]] = xor <2 x i64> [[TMP4]], <i64 -1, i64 -1>
; CHECK-NEXT:    [[TMP17:%.*]] = and <2 x i64> [[TMP14]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP18:%.*]] = and <2 x i64> [[TMP15]], [[_MSLD1]]
; CHECK-NEXT:    [[TMP19:%.*]] = and <2 x i64> [[TMP14]], [[TMP16]]
; CHECK-NEXT:    [[TMP20:%.*]] = or <2 x i64> [[TMP17]], [[TMP18]]
; CHECK-NEXT:    [[TMP21:%.*]] = or <2 x i64> [[TMP20]], [[TMP19]]
; CHECK-NEXT:    [[TMP5:%.*]] = or <2 x i64> [[TMP3]], [[TMP4]]
; CHECK-NEXT:    store <2 x i64> [[TMP21]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp4 = load <2 x i64>, ptr %B
  %tmp3 = shl <2 x i64> %tmp1, <i64 1, i64 1>
  %tmp5 = or <2 x i64> %tmp3, %tmp4
  ret <2 x i64> %tmp5
}

define <8 x i16> @shll(<8 x i8> %in) sanitize_memory {
; CHECK-LABEL: define <8 x i16> @shll(
; CHECK-SAME: <8 x i8> [[IN:%.*]]) #[[ATTR3:[0-9]+]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
; CHECK-NEXT:    [[EXT:%.*]] = zext <8 x i8> [[IN]] to <8 x i16>
; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i16> [[_MSPROP]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
; CHECK-NEXT:    [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer
; CHECK-NEXT:    [[RES:%.*]] = shl <8 x i16> [[EXT]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[RES]]
;
  %ext = zext <8 x i8> %in to <8 x i16>
  %res = shl <8 x i16> %ext, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
  ret <8 x i16> %res
}

define <4 x i32> @shll_high(<8 x i16> %in) sanitize_memory {
; CHECK-LABEL: define <4 x i32> @shll_high(
; CHECK-SAME: <8 x i16> [[IN:%.*]]) #[[ATTR3]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i16> [[IN]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[_MSPROP1:%.*]] = zext <4 x i16> [[_MSPROP]] to <4 x i32>
; CHECK-NEXT:    [[EXT:%.*]] = zext <4 x i16> [[EXTRACT]] to <4 x i32>
; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[_MSPROP1]], <i32 16, i32 16, i32 16, i32 16>
; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer
; CHECK-NEXT:    [[RES:%.*]] = shl <4 x i32> [[EXT]], <i32 16, i32 16, i32 16, i32 16>
; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[RES]]
;
  %extract = shufflevector <8 x i16> %in, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %ext = zext <4 x i16> %extract to <4 x i32>
  %res = shl <4 x i32> %ext, <i32 16, i32 16, i32 16, i32 16>
  ret <4 x i32> %res
}

define <8 x i8> @sli8b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i8> @sli8b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i8> [[_MSLD]] to i64
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <8 x i8> [[_MSLD1]] to i64
; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
; CHECK:       15:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       16:
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], i32 1)
; CHECK-NEXT:    store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
;
  %tmp1 = load <8 x i8>, ptr %A
  %tmp2 = load <8 x i8>, ptr %B
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1)
  ret <8 x i8> %tmp3
}

define <4 x i16> @sli4h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i16> @sli4h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i16> [[_MSLD]] to i64
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64
; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
; CHECK:       15:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       16:
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]], i32 1)
; CHECK-NEXT:    store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
;
  %tmp1 = load <4 x i16>, ptr %A
  %tmp2 = load <4 x i16>, ptr %B
  %tmp3 = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1)
  ret <4 x i16> %tmp3
}

define <2 x i32> @sli2s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i32> @sli2s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i32> [[_MSLD]] to i64
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64
; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
; CHECK:       15:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       16:
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], i32 1)
; CHECK-NEXT:    store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
;
  %tmp1 = load <2 x i32>, ptr %A
  %tmp2 = load <2 x i32>, ptr %B
  %tmp3 = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1)
  ret <2 x i32> %tmp3
}

define <1 x i64> @sli1d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <1 x i64> @sli1d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[_MSLD]] to i64
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP13]], 0
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[_MSLD1]] to i64
; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0
; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
; CHECK:       15:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       16:
; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]], i32 1)
; CHECK-NEXT:    store <1 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
;
  %tmp1 = load <1 x i64>, ptr %A
  %tmp2 = load <1 x i64>, ptr %B
  %tmp3 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1)
  ret <1 x i64> %tmp3
}

define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <16 x i8> @sli16b(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x i8> [[_MSLD]] to i128
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x i8> [[_MSLD1]] to i128
; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0
; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
; CHECK:       15:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       16:
; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], i32 1)
; CHECK-NEXT:    store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
;
  %tmp1 = load <16 x i8>, ptr %A
  %tmp2 = load <16 x i8>, ptr %B
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1)
  ret <16 x i8> %tmp3
}

define <8 x i16> @sli8h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <8 x i16> @sli8h(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0
; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
; CHECK:       15:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       16:
; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]], i32 1)
; CHECK-NEXT:    store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %tmp1 = load <8 x i16>, ptr %A
  %tmp2 = load <8 x i16>, ptr %B
  %tmp3 = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1)
  ret <8 x i16> %tmp3
}

define <4 x i32> @sli4s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <4 x i32> @sli4s(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0
; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
; CHECK:       15:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       16:
; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], i32 1)
; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %tmp1 = load <4 x i32>, ptr %A
  %tmp2 = load <4 x i32>, ptr %B
  %tmp3 = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1)
  ret <4 x i32> %tmp3
}

define <2 x i64> @sli2d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-LABEL: define <2 x i64> @sli2d(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
; CHECK:       3:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       4:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       8:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       9:
; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16
; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0
; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
; CHECK:       15:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       16:
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]], i32 1)
; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %tmp1 = load <2 x i64>, ptr %A
  %tmp2 = load <2 x i64>, ptr %B
  %tmp3 = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1)
  ret <2 x i64> %tmp3
}

declare <8 x i8>  @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone
declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone
declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone
declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone

declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone
declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone
declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone
declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone

define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) sanitize_memory {
; CHECK-LABEL: define <1 x i64> @ashr_v1i64(
; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR3]] {
; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
; CHECK-NEXT:    [[TMP4:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
; CHECK-NEXT:    [[TMP5:%.*]] = ashr <1 x i64> [[TMP1]], [[B]]
; CHECK-NEXT:    [[TMP6:%.*]] = or <1 x i64> [[TMP5]], [[TMP4]]
; CHECK-NEXT:    [[C:%.*]] = ashr <1 x i64> [[A]], [[B]]
; CHECK-NEXT:    store <1 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <1 x i64> [[C]]
;
  %c = ashr <1 x i64> %a, %b
  ret <1 x i64> %c
}

define void @sqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory {
; CHECK-LABEL: define void @sqshl_zero_shift_amount(
; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]]
; CHECK-NEXT:    [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> zeroinitializer)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
; CHECK-NEXT:    [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       5:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       6:
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8
; CHECK-NEXT:    store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8
; CHECK-NEXT:    ret void
;
entry:
  %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
  %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
  store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
  ret void
}

define void @uqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory {
; CHECK-LABEL: define void @uqshl_zero_shift_amount(
; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]]
; CHECK-NEXT:    [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> zeroinitializer)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
; CHECK-NEXT:    [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       5:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       6:
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8
; CHECK-NEXT:    store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8
; CHECK-NEXT:    ret void
;
entry:
  %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
  %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
  store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
  ret void
}

define void @srshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory {
; CHECK-LABEL: define void @srshl_zero_shift_amount(
; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]]
; CHECK-NEXT:    [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> zeroinitializer)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
; CHECK-NEXT:    [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       5:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       6:
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8
; CHECK-NEXT:    store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8
; CHECK-NEXT:    ret void
;
entry:
  %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
  %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
  store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
  ret void
}

define void @urshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory {
; CHECK-LABEL: define void @urshl_zero_shift_amount(
; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]]
; CHECK-NEXT:    [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> zeroinitializer)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
; CHECK-NEXT:    [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       5:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       6:
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8
; CHECK-NEXT:    store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8
; CHECK-NEXT:    ret void
;
entry:
  %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
  %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
  store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
  ret void
}

define void @sqshlu_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory {
; CHECK-LABEL: define void @sqshlu_zero_shift_amount(
; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]]
; CHECK-NEXT:    [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> zeroinitializer)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
; CHECK-NEXT:    [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       5:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       6:
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8
; CHECK-NEXT:    store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8
; CHECK-NEXT:    ret void
;
entry:
  %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
  %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
  store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
  ret void
}

define void @sshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory {
; CHECK-LABEL: define void @sshl_zero_shift_amount(
; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]]
; CHECK-NEXT:    [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> zeroinitializer)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
; CHECK-NEXT:    [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       5:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       6:
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8
; CHECK-NEXT:    store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8
; CHECK-NEXT:    ret void
;
entry:
  %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
  %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
  store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
  ret void
}

define void @ushl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory {
; CHECK-LABEL: define void @ushl_zero_shift_amount(
; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]]
; CHECK-NEXT:    [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> zeroinitializer)
; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
; CHECK-NEXT:    [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
; CHECK:       5:
; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
; CHECK-NEXT:    unreachable
; CHECK:       6:
; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64
; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT:    store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8
; CHECK-NEXT:    store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8
; CHECK-NEXT:    ret void
;
entry:
  %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
  %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
  store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
  ret void
}

define <4 x i32> @sext_rshrn(<4 x i32> noundef %a) sanitize_memory {
; CHECK-LABEL: define <4 x i32> @sext_rshrn(
; CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR3]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[TMP0]], i32 13)
; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i16> [[TMP1]], zeroinitializer
; CHECK-NEXT:    [[VRSHRN_N1:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[A]], i32 13)
; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32>
; CHECK-NEXT:    [[VMOVL_I:%.*]] = sext <4 x i16> [[VRSHRN_N1]] to <4 x i32>
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[VMOVL_I]]
;
entry:
  %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13)
  %vmovl.i = sext <4 x i16> %vrshrn_n1 to <4 x i32>
  ret <4 x i32> %vmovl.i
}

define <4 x i32> @zext_rshrn(<4 x i32> noundef %a) sanitize_memory {
; CHECK-LABEL: define <4 x i32> @zext_rshrn(
; CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR3]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[TMP0]], i32 13)
; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i16> [[TMP1]], zeroinitializer
; CHECK-NEXT:    [[VRSHRN_N1:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[A]], i32 13)
; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
; CHECK-NEXT:    [[VMOVL_I:%.*]] = zext <4 x i16> [[VRSHRN_N1]] to <4 x i32>
; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i32> [[VMOVL_I]]
;
entry:
  %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13)
  %vmovl.i = zext <4 x i16> %vrshrn_n1 to <4 x i32>
  ret <4 x i32> %vmovl.i
}

define <4 x i16> @mul_rshrn(<4 x i32> noundef %a) sanitize_memory {
; CHECK-LABEL: define <4 x i16> @mul_rshrn(
; CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR3]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT:    call void @llvm.donothing()
; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP0]], zeroinitializer
; CHECK-NEXT:    [[B:%.*]] = add <4 x i32> [[A]], <i32 3, i32 3, i32 3, i32 3>
; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[_MSPROP]], i32 13)
; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i16> [[TMP1]], zeroinitializer
; CHECK-NEXT:    [[VRSHRN_N1:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[B]], i32 13)
; CHECK-NEXT:    store <4 x i16> [[TMP2]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT:    ret <4 x i16> [[VRSHRN_N1]]
;
entry:
  %b = add <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
  %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 13)
  ret <4 x i16> %vrshrn_n1
}

declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>)
;.
; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 1048575}
;.