llvm/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=slp-vectorizer -slp-threshold=-999 -S -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck %s

declare i64 @may_inf_loop_ro() nounwind readonly

; Base case without allocas or stacksave
define void @basecase(ptr %a, ptr %b, ptr %c) {
; CHECK-LABEL: @basecase(
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8
; CHECK-NEXT:    store ptr null, ptr [[A]], align 8
; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> <i32 1, i32 1>
; CHECK-NEXT:    store <2 x ptr> [[TMP2]], ptr [[B:%.*]], align 8
; CHECK-NEXT:    ret void
;

  %v1 = load ptr, ptr %a
  store ptr zeroinitializer, ptr %a
  %a2 = getelementptr ptr, ptr %a, i32 1
  %v2 = load ptr, ptr %a2

  %add1 = getelementptr i8, ptr %v1, i32 1
  %add2 = getelementptr i8, ptr %v2, i32 1

  store ptr %add1, ptr %b
  %b2 = getelementptr ptr, ptr %b, i32 1
  store ptr %add2, ptr %b2
  ret void
}

; Using two allocas and a buildvector
define void @allocas(ptr %a, ptr %b, ptr %c) {
; CHECK-LABEL: @allocas(
; CHECK-NEXT:    [[V1:%.*]] = alloca i8, align 1
; CHECK-NEXT:    [[V2:%.*]] = alloca i8, align 1
; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> <i32 1, i32 1>
; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1
; CHECK-NEXT:    store ptr [[TMP4]], ptr [[A:%.*]], align 8
; CHECK-NEXT:    store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
; CHECK-NEXT:    ret void
;

  %v1 = alloca i8
  %add1 = getelementptr i8, ptr %v1, i32 1
  store ptr %add1, ptr %a
  %v2 = alloca i8

  %add2 = getelementptr i8, ptr %v2, i32 1

  store ptr %add1, ptr %b
  %b2 = getelementptr ptr, ptr %b, i32 1
  store ptr %add2, ptr %b2
  ret void
}

; Allocas can not be speculated above a potentially non-returning call
define void @allocas_speculation(ptr %a, ptr %b, ptr %c) {
; CHECK-LABEL: @allocas_speculation(
; CHECK-NEXT:    [[V1:%.*]] = alloca i8, align 1
; CHECK-NEXT:    [[ADD1:%.*]] = getelementptr i8, ptr [[V1]], i32 1
; CHECK-NEXT:    store ptr [[ADD1]], ptr [[A:%.*]], align 8
; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @may_inf_loop_ro()
; CHECK-NEXT:    [[V2:%.*]] = alloca i8, align 1
; CHECK-NEXT:    [[ADD2:%.*]] = getelementptr i8, ptr [[V2]], i32 1
; CHECK-NEXT:    store ptr [[ADD1]], ptr [[B:%.*]], align 8
; CHECK-NEXT:    [[B2:%.*]] = getelementptr ptr, ptr [[B]], i32 1
; CHECK-NEXT:    store ptr [[ADD2]], ptr [[B2]], align 8
; CHECK-NEXT:    ret void
;

  %v1 = alloca i8
  %add1 = getelementptr i8, ptr %v1, i32 1
  store ptr %add1, ptr %a
  call i64 @may_inf_loop_ro()
  %v2 = alloca i8

  %add2 = getelementptr i8, ptr %v2, i32 1

  store ptr %add1, ptr %b
  %b2 = getelementptr ptr, ptr %b, i32 1
  store ptr %add2, ptr %b2
  ret void
}

; We must be careful not to lift the inalloca alloc above the stacksave here.
; We used to miscompile this example before adding explicit dependency handling
; for stacksave.
define void @stacksave(ptr %a, ptr %b, ptr %c) {
; CHECK-LABEL: @stacksave(
; CHECK-NEXT:    [[V1:%.*]] = alloca i8, align 1
; CHECK-NEXT:    [[ADD1:%.*]] = getelementptr i8, ptr [[V1]], i32 1
; CHECK-NEXT:    store ptr [[ADD1]], ptr [[A:%.*]], align 8
; CHECK-NEXT:    [[STACK:%.*]] = call ptr @llvm.stacksave.p0()
; CHECK-NEXT:    [[V2:%.*]] = alloca inalloca i8, align 1
; CHECK-NEXT:    call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR4:[0-9]+]]
; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[STACK]])
; CHECK-NEXT:    [[ADD2:%.*]] = getelementptr i8, ptr [[V2]], i32 1
; CHECK-NEXT:    store ptr [[ADD1]], ptr [[B:%.*]], align 8
; CHECK-NEXT:    [[B2:%.*]] = getelementptr ptr, ptr [[B]], i32 1
; CHECK-NEXT:    store ptr [[ADD2]], ptr [[B2]], align 8
; CHECK-NEXT:    ret void
;

  %v1 = alloca i8
  %add1 = getelementptr i8, ptr %v1, i32 1
  store ptr %add1, ptr %a

  %stack = call ptr @llvm.stacksave()
  %v2 = alloca inalloca i8
  call void @use(ptr inalloca(i8) %v2) readnone
  call void @llvm.stackrestore(ptr %stack)

  %add2 = getelementptr i8, ptr %v2, i32 1

  store ptr %add1, ptr %b
  %b2 = getelementptr ptr, ptr %b, i32 1
  store ptr %add2, ptr %b2
  ret void
}

define void @stacksave2(ptr %a, ptr %b, ptr %c) {
; CHECK-LABEL: @stacksave2(
; CHECK-NEXT:    [[V1:%.*]] = alloca i8, align 1
; CHECK-NEXT:    [[STACK:%.*]] = call ptr @llvm.stacksave.p0()
; CHECK-NEXT:    [[V2:%.*]] = alloca inalloca i8, align 1
; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> <i32 1, i32 1>
; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1
; CHECK-NEXT:    store ptr [[TMP4]], ptr [[A:%.*]], align 8
; CHECK-NEXT:    call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR5:[0-9]+]]
; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[STACK]])
; CHECK-NEXT:    store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
; CHECK-NEXT:    ret void
;

  %v1 = alloca i8
  %add1 = getelementptr i8, ptr %v1, i32 1

  %stack = call ptr @llvm.stacksave()
  store ptr %add1, ptr %a
  %v2 = alloca inalloca i8
  call void @use(ptr inalloca(i8) %v2) readonly
  call void @llvm.stackrestore(ptr %stack)

  %add2 = getelementptr i8, ptr %v2, i32 1

  store ptr %add1, ptr %b
  %b2 = getelementptr ptr, ptr %b, i32 1
  store ptr %add2, ptr %b2
  ret void
}

define void @stacksave3(ptr %a, ptr %b, ptr %c) {
; CHECK-LABEL: @stacksave3(
; CHECK-NEXT:    [[STACK:%.*]] = call ptr @llvm.stacksave.p0()
; CHECK-NEXT:    [[V1:%.*]] = alloca i8, align 1
; CHECK-NEXT:    [[V2:%.*]] = alloca inalloca i8, align 1
; CHECK-NEXT:    call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR4]]
; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[STACK]])
; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> <i32 1, i32 1>
; CHECK-NEXT:    store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
; CHECK-NEXT:    ret void
;

  %stack = call ptr @llvm.stacksave()
  %v1 = alloca i8

  %v2 = alloca inalloca i8
  call void @use(ptr inalloca(i8) %v2) readnone
  call void @llvm.stackrestore(ptr %stack)

  %add1 = getelementptr i8, ptr %v1, i32 1
  %add2 = getelementptr i8, ptr %v2, i32 1

  store ptr %add1, ptr %b
  %b2 = getelementptr ptr, ptr %b, i32 1
  store ptr %add2, ptr %b2
  ret void
}

; Here we have an alloca which needs to stay under the stacksave, but is not
; directly part of the vectorization tree.  Instead, the stacksave is
; encountered during dependency scanning via the memory chain.
define void @stacksave4(ptr %a, ptr %b, ptr %c) {
; CHECK-LABEL: @stacksave4(
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8
; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> <i32 1, i32 1>
; CHECK-NEXT:    [[STACK:%.*]] = call ptr @llvm.stacksave.p0()
; CHECK-NEXT:    [[X:%.*]] = alloca inalloca i8, align 1
; CHECK-NEXT:    call void @use(ptr inalloca(i8) [[X]]) #[[ATTR4]]
; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[STACK]])
; CHECK-NEXT:    store <2 x ptr> [[TMP2]], ptr [[B:%.*]], align 8
; CHECK-NEXT:    ret void
;

  %v1 = load ptr, ptr %a
  %a2 = getelementptr ptr, ptr %a, i32 1
  %v2 = load ptr, ptr %a2

  %add1 = getelementptr i8, ptr %v1, i32 1
  %add2 = getelementptr i8, ptr %v2, i32 1

  %stack = call ptr @llvm.stacksave()
  %x = alloca inalloca i8
  call void @use(ptr inalloca(i8) %x) readnone
  call void @llvm.stackrestore(ptr %stack)

  store ptr %add1, ptr %b
  %b2 = getelementptr ptr, ptr %b, i32 1
  store ptr %add2, ptr %b2
  ret void
}

define void @stacksave5(ptr %a, ptr %b, ptr %c) {
; CHECK-LABEL: @stacksave5(
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8
; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> <i32 1, i32 1>
; CHECK-NEXT:    [[STACK:%.*]] = call ptr @llvm.stacksave.p0()
; CHECK-NEXT:    [[X:%.*]] = alloca inalloca i8, align 1
; CHECK-NEXT:    call void @use(ptr inalloca(i8) [[X]]) #[[ATTR4]]
; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[STACK]])
; CHECK-NEXT:    store <2 x ptr> [[TMP2]], ptr [[B:%.*]], align 8
; CHECK-NEXT:    ret void
;

  %v1 = load ptr, ptr %a
  %a2 = getelementptr ptr, ptr %a, i32 1
  %v2 = load ptr, ptr %a2

  %add1 = getelementptr i8, ptr %v1, i32 1
  %add2 = getelementptr i8, ptr %v2, i32 1

  %stack = call ptr @llvm.stacksave()
  %x = alloca inalloca i8
  call void @use(ptr inalloca(i8) %x) readnone
  call void @llvm.stackrestore(ptr %stack)

  store ptr %add1, ptr %b
  %b2 = getelementptr ptr, ptr %b, i32 1
  store ptr %add2, ptr %b2
  ret void
}

; Reordering the second alloca above the stackrestore while
; leaving the write to it below would introduce a write-after-free
; bug.
define void @stackrestore1(ptr %a, ptr %b, ptr %c) {
; CHECK-LABEL: @stackrestore1(
; CHECK-NEXT:    [[STACK:%.*]] = call ptr @llvm.stacksave.p0()
; CHECK-NEXT:    [[V1:%.*]] = alloca i8, align 1
; CHECK-NEXT:    store i8 0, ptr [[V1]], align 1
; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[STACK]])
; CHECK-NEXT:    [[V2:%.*]] = alloca i8, align 1
; CHECK-NEXT:    store i8 0, ptr [[V2]], align 1
; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> <i32 1, i32 1>
; CHECK-NEXT:    store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
; CHECK-NEXT:    ret void
;

  %stack = call ptr @llvm.stacksave()
  %v1 = alloca i8
  store i8 0, ptr %v1
  call void @llvm.stackrestore(ptr %stack)
  %v2 = alloca i8
  store i8 0, ptr %v2

  %add1 = getelementptr i8, ptr %v1, i32 1
  %add2 = getelementptr i8, ptr %v2, i32 1

  store ptr %add1, ptr %b
  %b2 = getelementptr ptr, ptr %b, i32 1
  store ptr %add2, ptr %b2
  ret void
}

declare void @use(ptr inalloca(i8))
declare ptr @llvm.stacksave()
declare void @llvm.stackrestore(ptr)

; The next set are reduced from previous regressions.

declare ptr @wibble(ptr)
declare void @quux(ptr inalloca(i32))

define void @ham() #1 {
; CHECK-LABEL: @ham(
; CHECK-NEXT:    [[VAR2:%.*]] = alloca i8, align 1
; CHECK-NEXT:    [[VAR3:%.*]] = alloca i8, align 1
; CHECK-NEXT:    [[VAR4:%.*]] = alloca i8, align 1
; CHECK-NEXT:    [[VAR5:%.*]] = alloca i8, align 1
; CHECK-NEXT:    [[VAR12:%.*]] = alloca [12 x ptr], align 8
; CHECK-NEXT:    [[VAR15:%.*]] = call ptr @wibble(ptr [[VAR2]])
; CHECK-NEXT:    [[VAR16:%.*]] = call ptr @wibble(ptr [[VAR3]])
; CHECK-NEXT:    [[VAR17:%.*]] = call ptr @wibble(ptr [[VAR4]])
; CHECK-NEXT:    [[VAR23:%.*]] = call ptr @llvm.stacksave.p0()
; CHECK-NEXT:    [[VAR24:%.*]] = alloca inalloca i32, align 4
; CHECK-NEXT:    call void @quux(ptr inalloca(i32) [[VAR24]])
; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[VAR23]])
; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[VAR4]], i32 0
; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x ptr> [[TMP1]], <4 x ptr> poison, <4 x i32> zeroinitializer
; CHECK-NEXT:    store <4 x ptr> [[TMP2]], ptr [[VAR12]], align 8
; CHECK-NEXT:    [[VAR36:%.*]] = getelementptr inbounds [12 x ptr], ptr [[VAR12]], i32 0, i32 4
; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[VAR5]], i32 1
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
; CHECK-NEXT:    store <4 x ptr> [[TMP4]], ptr [[VAR36]], align 8
; CHECK-NEXT:    ret void
;
  %var2 = alloca i8
  %var3 = alloca i8
  %var4 = alloca i8
  %var5 = alloca i8
  %var12 = alloca [12 x ptr]
  %var15 = call ptr @wibble(ptr %var2)
  %var16 = call ptr @wibble(ptr %var3)
  %var17 = call ptr @wibble(ptr %var4)
  %var23 = call ptr @llvm.stacksave()
  %var24 = alloca inalloca i32
  call void @quux(ptr inalloca(i32) %var24)
  call void @llvm.stackrestore(ptr %var23)
  store ptr %var4, ptr %var12
  %var33 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 1
  store ptr %var4, ptr %var33
  %var34 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 2
  store ptr %var4, ptr %var34
  %var35 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 3
  store ptr %var4, ptr %var35
  %var36 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 4
  store ptr %var4, ptr %var36
  %var37 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 5
  store ptr %var5, ptr %var37
  %var38 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 6
  store ptr %var5, ptr %var38
  %var39 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 7
  store ptr %var5, ptr %var39
  ret void
}

define void @spam() #1 {
; CHECK-LABEL: @spam(
; CHECK-NEXT:    [[VAR4:%.*]] = alloca i8, align 1
; CHECK-NEXT:    [[VAR5:%.*]] = alloca i8, align 1
; CHECK-NEXT:    [[VAR12:%.*]] = alloca [12 x ptr], align 8
; CHECK-NEXT:    [[VAR36:%.*]] = getelementptr inbounds [12 x ptr], ptr [[VAR12]], i32 0, i32 4
; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[VAR4]], i32 0
; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[VAR5]], i32 1
; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x ptr> [[TMP2]], <4 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
; CHECK-NEXT:    store <4 x ptr> [[TMP3]], ptr [[VAR36]], align 8
; CHECK-NEXT:    ret void
;
  %var4 = alloca i8
  %var5 = alloca i8
  %var12 = alloca [12 x ptr]
  %var36 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 4
  store ptr %var4, ptr %var36
  %var37 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 5
  store ptr %var5, ptr %var37
  %var38 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 6
  store ptr %var5, ptr %var38
  %var39 = getelementptr inbounds [12 x ptr], ptr %var12, i32 0, i32 7
  store ptr %var5, ptr %var39
  ret void
}

attributes #0 = { nofree nosync nounwind willreturn }
attributes #1 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" }