llvm/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=dse -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"

%struct.vec2 = type { <4 x i32>, <4 x i32> }
%struct.vec2plusi = type { <4 x i32>, <4 x i32>, i32 }

@glob1 = global %struct.vec2 zeroinitializer, align 16
@glob2 = global %struct.vec2plusi zeroinitializer, align 16

define void @write24to28(ptr nocapture %p) nounwind uwtable ssp {
; CHECK-LABEL: @write24to28(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ARRAYIDX0]], i8 0, i64 24, i1 false)
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
; CHECK-NEXT:    store i32 1, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %arrayidx0 = getelementptr inbounds i32, ptr %p, i64 1
  call void @llvm.memset.p0.i64(ptr align 4 %arrayidx0, i8 0, i64 28, i1 false)
  %arrayidx1 = getelementptr inbounds i32, ptr %p, i64 7
  store i32 1, ptr %arrayidx1, align 4
  ret void
}

define void @write24to28_atomic(ptr nocapture %p) nounwind uwtable ssp {
; CHECK-LABEL: @write24to28_atomic(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[ARRAYIDX0]], i8 0, i64 24, i32 4)
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
; CHECK-NEXT:    store atomic i32 1, ptr [[ARRAYIDX1]] unordered, align 4
; CHECK-NEXT:    ret void
;
entry:
  %arrayidx0 = getelementptr inbounds i32, ptr %p, i64 1
  call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 %arrayidx0, i8 0, i64 28, i32 4)
  %arrayidx1 = getelementptr inbounds i32, ptr %p, i64 7
  store atomic i32 1, ptr %arrayidx1 unordered, align 4
  ret void
}

; Atomicity of the store is weaker from the memset
define void @write24to28_atomic_weaker(ptr nocapture %p) nounwind uwtable ssp {
; CHECK-LABEL: @write24to28_atomic_weaker(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[ARRAYIDX0]], i8 0, i64 24, i32 4)
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
; CHECK-NEXT:    store i32 1, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %arrayidx0 = getelementptr inbounds i32, ptr %p, i64 1
  call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 %arrayidx0, i8 0, i64 28, i32 4)
  %arrayidx1 = getelementptr inbounds i32, ptr %p, i64 7
  store i32 1, ptr %arrayidx1, align 4
  ret void
}

define void @write28to32(ptr nocapture %p) nounwind uwtable ssp {
; CHECK-LABEL: @write28to32(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 28, i1 false)
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
; CHECK-NEXT:    store i32 1, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT:    ret void
;
entry:
  call void @llvm.memset.p0.i64(ptr align 4 %p, i8 0, i64 32, i1 false)
  %arrayidx1 = getelementptr inbounds i32, ptr %p, i64 7
  store i32 1, ptr %arrayidx1, align 4
  ret void
}

define void @write28to32_atomic(ptr nocapture %p) nounwind uwtable ssp {
; CHECK-LABEL: @write28to32_atomic(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 28, i32 4)
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
; CHECK-NEXT:    store atomic i32 1, ptr [[ARRAYIDX1]] unordered, align 4
; CHECK-NEXT:    ret void
;
entry:
  call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 %p, i8 0, i64 32, i32 4)
  %arrayidx1 = getelementptr inbounds i32, ptr %p, i64 7
  store atomic i32 1, ptr %arrayidx1 unordered, align 4
  ret void
}

define void @dontwrite28to32memset(ptr nocapture %p) nounwind uwtable ssp {
; CHECK-LABEL: @dontwrite28to32memset(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[P:%.*]], i8 0, i64 32, i1 false)
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
; CHECK-NEXT:    store i32 1, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT:    ret void
;
entry:
  call void @llvm.memset.p0.i64(ptr align 16 %p, i8 0, i64 32, i1 false)
  %arrayidx1 = getelementptr inbounds i32, ptr %p, i64 7
  store i32 1, ptr %arrayidx1, align 4
  ret void
}

define void @dontwrite28to32memset_atomic(ptr nocapture %p) nounwind uwtable ssp {
; CHECK-LABEL: @dontwrite28to32memset_atomic(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 16 [[P:%.*]], i8 0, i64 32, i32 4)
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
; CHECK-NEXT:    store atomic i32 1, ptr [[ARRAYIDX1]] unordered, align 4
; CHECK-NEXT:    ret void
;
entry:
  call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 16 %p, i8 0, i64 32, i32 4)
  %arrayidx1 = getelementptr inbounds i32, ptr %p, i64 7
  store atomic i32 1, ptr %arrayidx1 unordered, align 4
  ret void
}

define void @write32to36(ptr nocapture %p) nounwind uwtable ssp {
; CHECK-LABEL: @write32to36(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[P:%.*]], ptr align 16 @glob2, i64 32, i1 false)
; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_VEC2PLUSI:%.*]], ptr [[P]], i64 0, i32 2
; CHECK-NEXT:    store i32 1, ptr [[C]], align 4
; CHECK-NEXT:    ret void
;
entry:
  tail call void @llvm.memcpy.p0.p0.i64(ptr align 16 %p, ptr align 16 @glob2, i64 36, i1 false)
  %c = getelementptr inbounds %struct.vec2plusi, ptr %p, i64 0, i32 2
  store i32 1, ptr %c, align 4
  ret void
}

define void @write32to36_atomic(ptr nocapture %p) nounwind uwtable ssp {
; CHECK-LABEL: @write32to36_atomic(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 16 [[P:%.*]], ptr align 16 @glob2, i64 32, i32 4)
; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_VEC2PLUSI:%.*]], ptr [[P]], i64 0, i32 2
; CHECK-NEXT:    store atomic i32 1, ptr [[C]] unordered, align 4
; CHECK-NEXT:    ret void
;
entry:
  tail call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 16 %p, ptr align 16 @glob2, i64 36, i32 4)
  %c = getelementptr inbounds %struct.vec2plusi, ptr %p, i64 0, i32 2
  store atomic i32 1, ptr %c unordered, align 4
  ret void
}

; Atomicity of the store is weaker than the memcpy
define void @write32to36_atomic_weaker(ptr nocapture %p) nounwind uwtable ssp {
; CHECK-LABEL: @write32to36_atomic_weaker(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 16 [[P:%.*]], ptr align 16 @glob2, i64 32, i32 4)
; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_VEC2PLUSI:%.*]], ptr [[P]], i64 0, i32 2
; CHECK-NEXT:    store i32 1, ptr [[C]], align 4
; CHECK-NEXT:    ret void
;
entry:
  tail call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 16 %p, ptr align 16 @glob2, i64 36, i32 4)
  %c = getelementptr inbounds %struct.vec2plusi, ptr %p, i64 0, i32 2
  store i32 1, ptr %c, align 4
  ret void
}

define void @write16to32(ptr nocapture %p) nounwind uwtable ssp {
; CHECK-LABEL: @write16to32(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[P:%.*]], ptr align 16 @glob1, i64 16, i1 false)
; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_VEC2:%.*]], ptr [[P]], i64 0, i32 1
; CHECK-NEXT:    store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr [[C]], align 4
; CHECK-NEXT:    ret void
;
entry:
  tail call void @llvm.memcpy.p0.p0.i64(ptr align 16 %p, ptr align 16 @glob1, i64 32, i1 false)
  %c = getelementptr inbounds %struct.vec2, ptr %p, i64 0, i32 1
  store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr %c, align 4
  ret void
}

define void @write16to32_atomic(ptr nocapture %p) nounwind uwtable ssp {
; CHECK-LABEL: @write16to32_atomic(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 16 [[P:%.*]], ptr align 16 @glob1, i64 16, i32 4)
; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_VEC2:%.*]], ptr [[P]], i64 0, i32 1
; CHECK-NEXT:    store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr [[C]], align 4
; CHECK-NEXT:    ret void
;
entry:
  tail call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 16 %p, ptr align 16 @glob1, i64 32, i32 4)
  %c = getelementptr inbounds %struct.vec2, ptr %p, i64 0, i32 1
  store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr %c, align 4
  ret void
}

define void @dontwrite28to32memcpy(ptr nocapture %p) nounwind uwtable ssp {
; CHECK-LABEL: @dontwrite28to32memcpy(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[P:%.*]], ptr align 16 @glob1, i64 32, i1 false)
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_VEC2:%.*]], ptr [[P]], i64 0, i32 0, i64 7
; CHECK-NEXT:    store i32 1, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT:    ret void
;
entry:
  tail call void @llvm.memcpy.p0.p0.i64(ptr align 16 %p, ptr align 16 @glob1, i64 32, i1 false)
  %arrayidx1 = getelementptr inbounds %struct.vec2, ptr %p, i64 0, i32 0, i64 7
  store i32 1, ptr %arrayidx1, align 4
  ret void
}

define void @dontwrite28to32memcpy_atomic(ptr nocapture %p) nounwind uwtable ssp {
; CHECK-LABEL: @dontwrite28to32memcpy_atomic(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 16 [[P:%.*]], ptr align 16 @glob1, i64 32, i32 4)
; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_VEC2:%.*]], ptr [[P]], i64 0, i32 0, i64 7
; CHECK-NEXT:    store atomic i32 1, ptr [[ARRAYIDX1]] unordered, align 4
; CHECK-NEXT:    ret void
;
entry:
  tail call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 16 %p, ptr align 16 @glob1, i64 32, i32 4)
  %arrayidx1 = getelementptr inbounds %struct.vec2, ptr %p, i64 0, i32 0, i64 7
  store atomic i32 1, ptr %arrayidx1 unordered, align 4
  ret void
}

declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
declare void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i32) nounwind
declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
declare void @llvm.memset.element.unordered.atomic.p0.i64(ptr nocapture, i8, i64, i32) nounwind

%struct.trapframe = type { i64, i64, i64 }

; bugzilla 11455 - make sure negative GEP's don't break this optimisation
define void @cpu_lwp_fork(ptr %md_regs, i64 %pcb_rsp0) nounwind uwtable noinline ssp {
; CHECK-LABEL: @cpu_lwp_fork(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = inttoptr i64 [[PCB_RSP0:%.*]] to ptr
; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds [[STRUCT_TRAPFRAME:%.*]], ptr [[TMP0]], i64 -1
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[ADD_PTR]], ptr [[MD_REGS:%.*]], i64 24, i1 false)
; CHECK-NEXT:    [[TF_TRAPNO:%.*]] = getelementptr inbounds [[STRUCT_TRAPFRAME]], ptr [[TMP0]], i64 -1, i32 1
; CHECK-NEXT:    store i64 3, ptr [[TF_TRAPNO]], align 8
; CHECK-NEXT:    ret void
;
entry:
  %0 = inttoptr i64 %pcb_rsp0 to ptr
  %add.ptr = getelementptr inbounds %struct.trapframe, ptr %0, i64 -1
  call void @llvm.memcpy.p0.p0.i64(ptr %add.ptr, ptr %md_regs, i64 24, i1 false)
  %tf_trapno = getelementptr inbounds %struct.trapframe, ptr %0, i64 -1, i32 1
  store i64 3, ptr %tf_trapno, align 8
  ret void
}

define void @write16To23AndThen24To31(ptr nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) {
; CHECK-LABEL: @write16To23AndThen24To31(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 8 [[P:%.*]], i8 0, i64 16, i1 false)
; CHECK-NEXT:    [[BASE64_2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 2
; CHECK-NEXT:    [[BASE64_3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 3
; CHECK-NEXT:    store i64 3, ptr [[BASE64_2]]
; CHECK-NEXT:    store i64 3, ptr [[BASE64_3]]
; CHECK-NEXT:    ret void
;
entry:

  tail call void @llvm.memset.p0.i64(ptr align 8 %P, i8 0, i64 32, i1 false)

  %base64_2 = getelementptr inbounds i64, ptr %P, i64 2
  %base64_3 = getelementptr inbounds i64, ptr %P, i64 3

  store i64 3, ptr %base64_2
  store i64 3, ptr %base64_3
  ret void
}

define void @write16To23AndThen24To31_atomic(ptr nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) {
; CHECK-LABEL: @write16To23AndThen24To31_atomic(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    tail call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 [[P:%.*]], i8 0, i64 16, i32 8)
; CHECK-NEXT:    [[BASE64_2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 2
; CHECK-NEXT:    [[BASE64_3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 3
; CHECK-NEXT:    store atomic i64 3, ptr [[BASE64_2]] unordered, align 8
; CHECK-NEXT:    store atomic i64 3, ptr [[BASE64_3]] unordered, align 8
; CHECK-NEXT:    ret void
;
entry:

  tail call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 %P, i8 0, i64 32, i32 8)

  %base64_2 = getelementptr inbounds i64, ptr %P, i64 2
  %base64_3 = getelementptr inbounds i64, ptr %P, i64 3

  store atomic i64 3, ptr %base64_2 unordered, align 8
  store atomic i64 3, ptr %base64_3 unordered, align 8
  ret void
}

define void @write16To23AndThen24To31_atomic_weaker1(ptr nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) {
; CHECK-LABEL: @write16To23AndThen24To31_atomic_weaker1(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    tail call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 [[P:%.*]], i8 0, i64 16, i32 8)
; CHECK-NEXT:    [[BASE64_2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 2
; CHECK-NEXT:    [[BASE64_3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 3
; CHECK-NEXT:    store i64 3, ptr [[BASE64_2]], align 8
; CHECK-NEXT:    store atomic i64 3, ptr [[BASE64_3]] unordered, align 8
; CHECK-NEXT:    ret void
;
entry:

  tail call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 %P, i8 0, i64 32, i32 8)

  %base64_2 = getelementptr inbounds i64, ptr %P, i64 2
  %base64_3 = getelementptr inbounds i64, ptr %P, i64 3

  store i64 3, ptr %base64_2, align 8
  store atomic i64 3, ptr %base64_3 unordered, align 8
  ret void
}

define void @write16To23AndThen24To31_atomic_weaker2(ptr nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) {
; CHECK-LABEL: @write16To23AndThen24To31_atomic_weaker2(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    tail call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 [[P:%.*]], i8 0, i64 16, i32 8)
; CHECK-NEXT:    [[BASE64_2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 2
; CHECK-NEXT:    [[BASE64_3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 3
; CHECK-NEXT:    store atomic i64 3, ptr [[BASE64_2]] unordered, align 8
; CHECK-NEXT:    store i64 3, ptr [[BASE64_3]], align 8
; CHECK-NEXT:    ret void
;
entry:

  tail call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 %P, i8 0, i64 32, i32 8)

  %base64_2 = getelementptr inbounds i64, ptr %P, i64 2
  %base64_3 = getelementptr inbounds i64, ptr %P, i64 3

  store atomic i64 3, ptr %base64_2 unordered, align 8
  store i64 3, ptr %base64_3, align 8
  ret void
}

define void @ow_end_align1(ptr nocapture %p) {
; CHECK-LABEL: @ow_end_align1(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 [[P1]], i8 0, i64 27, i1 false)
; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 27
; CHECK-NEXT:    store i64 1, ptr [[P2]], align 1
; CHECK-NEXT:    ret void
;
entry:
  %p1 = getelementptr inbounds i8, ptr %p, i64 1
  call void @llvm.memset.p0.i64(ptr align 1 %p1, i8 0, i64 32, i1 false)
  %p2 = getelementptr inbounds i8, ptr %p1, i64 27
  store i64 1, ptr %p2, align 1
  ret void
}

define void @ow_end_align4(ptr nocapture %p) {
; CHECK-LABEL: @ow_end_align4(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[P1]], i8 0, i64 28, i1 false)
; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 27
; CHECK-NEXT:    store i64 1, ptr [[P2]], align 1
; CHECK-NEXT:    ret void
;
entry:
  %p1 = getelementptr inbounds i8, ptr %p, i64 1
  call void @llvm.memset.p0.i64(ptr align 4 %p1, i8 0, i64 32, i1 false)
  %p2 = getelementptr inbounds i8, ptr %p1, i64 27
  store i64 1, ptr %p2, align 1
  ret void
}

define void @ow_end_align8(ptr nocapture %p) {
; CHECK-LABEL: @ow_end_align8(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[P1]], i8 0, i64 32, i1 false)
; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 27
; CHECK-NEXT:    store i64 1, ptr [[P2]], align 1
; CHECK-NEXT:    ret void
;
entry:
  %p1 = getelementptr inbounds i8, ptr %p, i64 1
  call void @llvm.memset.p0.i64(ptr align 8 %p1, i8 0, i64 32, i1 false)
  %p2 = getelementptr inbounds i8, ptr %p1, i64 27
  store i64 1, ptr %p2, align 1
  ret void
}