llvm/llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt < %s -passes=memcpyopt -S -verify-memoryssa | FileCheck %s

%buf = type [9 x i8]

; We can forward `memcpy` because the copy location are the same,
define void @forward_offset(ptr %src, ptr %dest) {
; CHECK-LABEL: define void @forward_offset(
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
; CHECK-NEXT:    [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false)
; CHECK-NEXT:    [[SRC_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 6, i1 false)
; CHECK-NEXT:    ret void
;
  %cpy_tmp = alloca %buf, align 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false)
  %cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false)
  ret void
}

; We need to update the align value of the source of `memcpy` when forwarding.
define void @forward_offset_align(ptr %src, ptr %dest) {
; CHECK-LABEL: define void @forward_offset_align(
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
; CHECK-NEXT:    [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 4 [[SRC]], i64 9, i1 false)
; CHECK-NEXT:    [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 3
; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 3
; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 5, i1 false)
; CHECK-NEXT:    ret void
;
  %cpy_tmp = alloca %buf, align 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 4 %src, i64 9, i1 false)
  %cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 3
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 5, i1 false)
  ret void
}

; We can change the align value to 2 when forwarding.
define void @forward_offset_align_2(ptr %src, ptr %dest) {
; CHECK-LABEL: define void @forward_offset_align_2(
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
; CHECK-NEXT:    [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 4 [[SRC]], i64 9, i1 false)
; CHECK-NEXT:    [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 2
; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 2
; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 2 [[TMP1]], i64 6, i1 false)
; CHECK-NEXT:    ret void
;
  %cpy_tmp = alloca %buf, align 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 4 %src, i64 9, i1 false)
  %cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 2
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false)
  ret void
}

; If the copy destination can be used as the copy source, we don't need to create a GEP instruction.
define void @forward_offset_without_gep(ptr %src) {
; CHECK-LABEL: define void @forward_offset_without_gep(
; CHECK-SAME: ptr [[SRC:%.*]]) {
; CHECK-NEXT:    [[TMP:%.*]] = alloca [9 x i8], align 1
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[TMP]], ptr align 1 [[SRC]], i64 7, i1 false)
; CHECK-NEXT:    [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 1
; CHECK-NEXT:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
; CHECK-NEXT:    ret void
;
  %cpy_tmp = alloca %buf, align 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false)
  %cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
  %dest = getelementptr inbounds i8, ptr %src, i64 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false)
  ret void
}

; We need to create a GEP instruction when forwarding.
define void @forward_offset_with_gep(ptr %src) {
; CHECK-LABEL: define void @forward_offset_with_gep(
; CHECK-SAME: ptr [[SRC:%.*]]) {
; CHECK-NEXT:    [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false)
; CHECK-NEXT:    [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
; CHECK-NEXT:    [[DEST:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 2
; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 6, i1 false)
; CHECK-NEXT:    ret void
;
  %cpy_tmp = alloca %buf, align 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false)
  %cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
  %dest = getelementptr inbounds i8, ptr %src, i64 2
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false)
  ret void
}

; Make sure we pass the right parameters when calling `memcpy`.
define void @forward_offset_memcpy(ptr %src, ptr %dest) {
; CHECK-LABEL: define void @forward_offset_memcpy(
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
; CHECK-NEXT:    [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false)
; CHECK-NEXT:    [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 6, i1 false)
; CHECK-NEXT:    call void @use(ptr [[DEST]])
; CHECK-NEXT:    ret void
;
  %cpy_tmp = alloca %buf, align 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false)
  %cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false)
  call void @use(ptr %dest)
  ret void
}

; Make sure we pass the right parameters when calling `memcpy.inline`.
define void @forward_offset_memcpy_inline(ptr %src, ptr %dest) {
; CHECK-LABEL: define void @forward_offset_memcpy_inline(
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
; CHECK-NEXT:    [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false)
; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
; CHECK-NEXT:    call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 6, i1 false)
; CHECK-NEXT:    call void @use(ptr [[DEST]])
; CHECK-NEXT:    ret void
;
  %cpy_tmp = alloca %buf, align 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false)
  %cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
  call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false)
  call void @use(ptr %dest)
  ret void
}

; We cannot forward `memcpy` because it exceeds the size of `memcpy` it depends on.
define void @do_not_forward_oversize_offset(ptr %src, ptr %dest) {
; CHECK-LABEL: define void @do_not_forward_oversize_offset(
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
; CHECK-NEXT:    [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 6, i1 false)
; CHECK-NEXT:    [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP_OFFSET]], i64 6, i1 false)
; CHECK-NEXT:    ret void
;
  %cpy_tmp = alloca %buf, align 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 6, i1 false)
  %cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 6, i1 false)
  ret void
}

; We can forward `memcpy` because the write operation does not corrupt the location to be copied.
define void @forward_offset_and_store(ptr %src, ptr %dest) {
; CHECK-LABEL: define void @forward_offset_and_store(
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
; CHECK-NEXT:    [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false)
; CHECK-NEXT:    store i8 1, ptr [[SRC]], align 1
; CHECK-NEXT:    [[DEP_SRC_END:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 6
; CHECK-NEXT:    store i8 1, ptr [[DEP_SRC_END]], align 1
; CHECK-NEXT:    [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 5, i1 false)
; CHECK-NEXT:    ret void
;
  %cpy_tmp = alloca %buf, align 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false)
  store i8 1, ptr %src, align 1
  %src_end = getelementptr inbounds i8, ptr %src, i64 6
  store i8 1, ptr %src_end, align 1
  %cpy_tmp_offset  = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 5, i1 false)
  ret void
}

; We cannot forward `memcpy` because the write operation alters the location to be copied.
; Also, make sure we have removed the GEP instruction that was created temporarily.
define void @do_not_forward_offset_and_store(ptr %src, ptr %dest) {
; CHECK-LABEL: define void @do_not_forward_offset_and_store(
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
; CHECK-NEXT:    [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 7, i1 false)
; CHECK-NEXT:    [[DEP:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
; CHECK-NEXT:    store i8 1, ptr [[DEP]], align 1
; CHECK-NEXT:    [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP_OFFSET]], i64 5, i1 false)
; CHECK-NEXT:    ret void
;
  %cpy_tmp = alloca %buf, align 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %cpy_tmp, ptr align 1 %src, i64 7, i1 false)
  %src_offset = getelementptr inbounds i8, ptr %src, i64 1
  store i8 1, ptr %src_offset, align 1
  %cpy_tmp_offset = getelementptr inbounds i8, ptr %cpy_tmp, i64 1
  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %dest, ptr align 1 %cpy_tmp_offset, i64 5, i1 false)
  ret void
}

; Make sure we don't crash when the copy source is a constant.
@buf = external global [32 x i8]

define void @pr98675(ptr noalias %p1, ptr noalias %p2) {
; CHECK-LABEL: define void @pr98675(
; CHECK-SAME: ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]]) {
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[P1]], ptr @buf, i64 26, i1 false)
; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[P1]], i64 10
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[P2]], ptr getelementptr inbounds (i8, ptr @buf, i64 10), i64 1, i1 false)
; CHECK-NEXT:    ret void
;
  call void @llvm.memcpy.p0.p0.i64(ptr %p1, ptr @buf, i64 26, i1 false)
  %gep = getelementptr i8, ptr %p1, i64 10
  call void @llvm.memmove.p0.p0.i64(ptr %p2, ptr %gep, i64 1, i1 false)
  ret void
}

declare void @use(ptr)

declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1)
declare void @llvm.memcpy.inline.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1)