; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 %s -o - | FileCheck %s
; Testing codegen for memcpy with vector operands for all combinations of the following parameters:
; destination address space: 0, 1, 3, 5
; source address space: 0, 1, 3, 4, 5
; alignment: 1, 2, 8, 16
; sizes: 16, 31, 32
define void @memcpy_p0_p0_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p0_p0_sz16_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p0_p0_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p0_p0_sz31_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: flat_load_ubyte v9, v[2:3] offset:30
; CHECK-NEXT: flat_load_ushort v10, v[2:3] offset:28
; CHECK-NEXT: flat_load_dwordx3 v[6:8], v[2:3] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3)
; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30
; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(3)
; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(3)
; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(3)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p0_p0_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p0_p0_sz32_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p0_p0_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p0_p0_sz16_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p0_p0_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p0_p0_sz31_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: flat_load_ubyte v9, v[2:3] offset:30
; CHECK-NEXT: flat_load_ushort v10, v[2:3] offset:28
; CHECK-NEXT: flat_load_dwordx3 v[6:8], v[2:3] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3)
; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30
; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(3)
; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(3)
; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(3)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p0_p0_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p0_p0_sz32_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p0_p0_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p0_p0_sz16_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p0_p0_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p0_p0_sz31_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:15
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:15
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p0_p0_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p0_p0_sz32_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p0_p0_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p0_p0_sz16_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p0_p0_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p0_p0_sz31_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:15
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:15
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p0_p0_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p0_p0_sz32_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p0_p1_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p0_p1_sz16_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p0_p1_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p0_p1_sz31_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: global_load_ubyte v9, v[2:3], off offset:30
; CHECK-NEXT: global_load_ushort v10, v[2:3], off offset:28
; CHECK-NEXT: global_load_dwordx3 v[6:8], v[2:3], off offset:16
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(3)
; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p0_p1_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p0_p1_sz32_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p0_p1_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p0_p1_sz16_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p0_p1_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p0_p1_sz31_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: global_load_ubyte v9, v[2:3], off offset:30
; CHECK-NEXT: global_load_ushort v10, v[2:3], off offset:28
; CHECK-NEXT: global_load_dwordx3 v[6:8], v[2:3], off offset:16
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(3)
; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p0_p1_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p0_p1_sz32_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p0_p1_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p0_p1_sz16_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p0_p1_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p0_p1_sz31_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:15
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:15
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p0_p1_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p0_p1_sz32_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p0_p1_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p0_p1_sz16_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p0_p1_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p0_p1_sz31_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:15
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:15
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p0_p1_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p0_p1_sz32_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p0_p3_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p0_p3_sz16_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p0_p3_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p0_p3_sz31_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_u8 v9, v2 offset:30
; CHECK-NEXT: ds_read_b32 v8, v2 offset:24
; CHECK-NEXT: ds_read_u16 v10, v2 offset:28
; CHECK-NEXT: ds_read_b64 v[6:7], v2 offset:16
; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(4)
; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30
; CHECK-NEXT: s_waitcnt lgkmcnt(3)
; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
; CHECK-NEXT: s_waitcnt lgkmcnt(3)
; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(3)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p0_p3_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p0_p3_sz32_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset0:2 offset1:3
; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p0_p3_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p0_p3_sz16_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p0_p3_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p0_p3_sz31_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_u8 v9, v2 offset:30
; CHECK-NEXT: ds_read_b32 v8, v2 offset:24
; CHECK-NEXT: ds_read_u16 v10, v2 offset:28
; CHECK-NEXT: ds_read_b64 v[6:7], v2 offset:16
; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(4)
; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30
; CHECK-NEXT: s_waitcnt lgkmcnt(3)
; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
; CHECK-NEXT: s_waitcnt lgkmcnt(3)
; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(3)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p0_p3_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p0_p3_sz32_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset0:2 offset1:3
; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p0_p3_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p0_p3_sz16_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p0_p3_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p0_p3_sz31_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:15
; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:15
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p0_p3_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p0_p3_sz32_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset0:2 offset1:3
; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p0_p3_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p0_p3_sz16_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b128 v[2:5], v2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p0_p3_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p0_p3_sz31_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:15
; CHECK-NEXT: ds_read_b128 v[7:10], v2
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:15
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p0_p3_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p0_p3_sz32_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:16
; CHECK-NEXT: ds_read_b128 v[7:10], v2
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p0_p4_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p0_p4_sz16_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:8
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:8
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p0_p4_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p0_p4_sz31_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16
; CHECK-NEXT: global_load_dword v4, v[2:3], off offset:24
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dword v[0:1], v4 offset:24
; CHECK-NEXT: global_load_ushort v4, v[2:3], off offset:28
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_short v[0:1], v4 offset:28
; CHECK-NEXT: global_load_ubyte v2, v[2:3], off offset:30
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_byte v[0:1], v2 offset:30
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p0_p4_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p0_p4_sz32_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16
; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:24
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:24
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p0_p4_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p0_p4_sz16_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:8
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:8
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p0_p4_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p0_p4_sz31_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16
; CHECK-NEXT: global_load_dword v4, v[2:3], off offset:24
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dword v[0:1], v4 offset:24
; CHECK-NEXT: global_load_ushort v4, v[2:3], off offset:28
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_short v[0:1], v4 offset:28
; CHECK-NEXT: global_load_ubyte v2, v[2:3], off offset:30
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_byte v[0:1], v2 offset:30
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p0_p4_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p0_p4_sz32_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8
; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16
; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:24
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:24
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p0_p4_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p0_p4_sz16_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p0_p4_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p0_p4_sz31_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:15
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:15
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p0_p4_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p0_p4_sz32_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p0_p4_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p0_p4_sz16_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p0_p4_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p0_p4_sz31_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:15
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:15
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p0_p4_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p0_p4_sz32_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p0_p5_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p0_p5_sz16_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p0_p5_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p0_p5_sz31_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x8
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_load_ushort v10, v2, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:30
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(5)
; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:30
; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[7:9] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p0_p5_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p0_p5_sz32_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p0_p5_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p0_p5_sz16_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p0_p5_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p0_p5_sz31_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x8
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_load_ushort v10, v2, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:30
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(5)
; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:30
; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[7:9] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p0_p5_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p0_p5_sz32_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p0_p5_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p0_p5_sz16_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p0_p5_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p0_p5_sz31_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:15
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:19
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] offset:15
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p0_p5_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p0_p5_sz32_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p0_p5_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p0_p5_sz16_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p0_p5_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p0_p5_sz31_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:15
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:19
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] offset:15
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p0_p5_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p0_p5_sz32_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p1_p0_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p1_p0_sz16_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p1_p0_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p1_p0_sz31_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x2
; CHECK-NEXT: flat_load_dwordx2 v[6:7], v[2:3] offset:23
; CHECK-NEXT: flat_load_dwordx2 v[8:9], v[2:3] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:23
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p1_p0_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p1_p0_sz32_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p1_p0_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p1_p0_sz16_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p1_p0_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p1_p0_sz31_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x2
; CHECK-NEXT: flat_load_dwordx2 v[6:7], v[2:3] offset:23
; CHECK-NEXT: flat_load_dwordx2 v[8:9], v[2:3] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:23
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p1_p0_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p1_p0_sz32_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p1_p0_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p1_p0_sz16_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p1_p0_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p1_p0_sz31_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:15
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:15
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p1_p0_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p1_p0_sz32_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p1_p0_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p1_p0_sz16_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p1_p0_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p1_p0_sz31_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:15
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:15
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p1_p0_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p1_p0_sz32_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p1_p1_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p1_p1_sz16_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p1_p1_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p1_p1_sz31_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x2
; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off offset:23
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:23
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p1_p1_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p1_p1_sz32_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p1_p1_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p1_p1_sz16_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p1_p1_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p1_p1_sz31_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x2
; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off offset:23
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:23
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p1_p1_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p1_p1_sz32_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p1_p1_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p1_p1_sz16_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p1_p1_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p1_p1_sz31_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:15
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:15
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p1_p1_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p1_p1_sz32_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p1_p1_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p1_p1_sz16_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p1_p1_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p1_p1_sz31_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:15
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:15
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p1_p1_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p1_p1_sz32_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p1_p3_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p1_p3_sz16_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b128 v[2:5], v2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p1_p3_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p1_p3_sz31_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b64 v[7:8], v2
; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:8
; CHECK-NEXT: ds_read_b64 v[9:10], v2 offset:23
; CHECK-NEXT: s_waitcnt lgkmcnt(2)
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:8
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p1_p3_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p1_p3_sz32_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b128 v[3:6], v2
; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p1_p3_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p1_p3_sz16_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b128 v[2:5], v2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p1_p3_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p1_p3_sz31_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b64 v[7:8], v2
; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:8
; CHECK-NEXT: ds_read_b64 v[9:10], v2 offset:23
; CHECK-NEXT: s_waitcnt lgkmcnt(2)
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:8
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p1_p3_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p1_p3_sz32_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b128 v[3:6], v2
; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p1_p3_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p1_p3_sz16_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p1_p3_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p1_p3_sz31_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset1:1
; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:15
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:15
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p1_p3_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p1_p3_sz32_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset1:1
; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset0:2 offset1:3
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p1_p3_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p1_p3_sz16_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b128 v[2:5], v2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p1_p3_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p1_p3_sz31_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b128 v[3:6], v2
; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:15
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:15
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p1_p3_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p1_p3_sz32_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b128 v[3:6], v2
; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p1_p4_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p1_p4_sz16_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p1_p4_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p1_p4_sz31_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:8
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:8
; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:23
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:23
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p1_p4_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p1_p4_sz32_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p1_p4_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p1_p4_sz16_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p1_p4_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p1_p4_sz31_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:8
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:8
; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:23
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:23
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p1_p4_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p1_p4_sz32_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p1_p4_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p1_p4_sz16_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p1_p4_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p1_p4_sz31_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:15
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:15
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p1_p4_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p1_p4_sz32_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p1_p4_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p1_p4_sz16_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p1_p4_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p1_p4_sz31_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:15
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:15
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p1_p4_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p1_p4_sz32_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off
; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p1_p5_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p1_p5_sz16_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p1_p5_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p1_p5_sz31_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p1_p5_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p1_p5_sz32_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p1_p5_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p1_p5_sz16_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p1_p5_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p1_p5_sz31_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p1_p5_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p1_p5_sz32_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p1_p5_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p1_p5_sz16_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p1_p5_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p1_p5_sz31_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:15
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:19
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:15
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p1_p5_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p1_p5_sz32_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p1_p5_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p1_p5_sz16_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p1_p5_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p1_p5_sz31_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:15
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:19
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:15
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p1_p5_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p1_p5_sz32_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p3_p0_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p3_p0_sz16_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p3_p0_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p3_p0_sz31_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x2
; CHECK-NEXT: flat_load_dwordx2 v[5:6], v[1:2] offset:23
; CHECK-NEXT: flat_load_dwordx2 v[7:8], v[1:2] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
; CHECK-NEXT: ds_write_b64 v0, v[5:6] offset:23
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(2)
; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(2)
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p3_p0_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p3_p0_sz32_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset0:2 offset1:3
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p3_p0_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p3_p0_sz16_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p3_p0_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p3_p0_sz31_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x2
; CHECK-NEXT: flat_load_dwordx2 v[5:6], v[1:2] offset:23
; CHECK-NEXT: flat_load_dwordx2 v[7:8], v[1:2] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
; CHECK-NEXT: ds_write_b64 v0, v[5:6] offset:23
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(2)
; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(2)
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p3_p0_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p3_p0_sz32_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset0:2 offset1:3
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p3_p0_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p3_p0_sz16_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p3_p0_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p3_p0_sz31_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2]
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] offset:15
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p3_p0_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p3_p0_sz32_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset0:2 offset1:3
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p3_p0_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p3_p0_sz16_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_write_b128 v0, v[1:4]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p3_p0_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p3_p0_sz31_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:15
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: ds_write_b128 v0, v[3:6] offset:15
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
; CHECK-NEXT: ds_write_b128 v0, v[7:10]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p3_p0_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p3_p0_sz32_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: ds_write_b128 v0, v[3:6] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1)
; CHECK-NEXT: ds_write_b128 v0, v[7:10]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p3_p1_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p3_p1_sz16_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p3_p1_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p3_p1_sz31_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x2
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16
; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write_b64 v0, v[1:2] offset:23
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p3_p1_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p3_p1_sz32_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p3_p1_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p3_p1_sz16_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p3_p1_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p3_p1_sz31_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x2
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16
; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write_b64 v0, v[1:2] offset:23
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p3_p1_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p3_p1_sz32_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p3_p1_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p3_p1_sz16_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p3_p1_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p3_p1_sz31_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p3_p1_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p3_p1_sz32_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p3_p1_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p3_p1_sz16_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write_b128 v0, v[1:4]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p3_p1_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p3_p1_sz31_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: ds_write_b128 v0, v[3:6]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p3_p1_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p3_p1_sz32_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: ds_write_b128 v0, v[3:6]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p3_p3_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p3_p3_sz16_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p3_p3_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p3_p3_sz31_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b64 v[5:6], v1 offset:23
; CHECK-NEXT: ds_read_b64 v[7:8], v1 offset:16
; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(2)
; CHECK-NEXT: ds_write_b64 v0, v[5:6] offset:23
; CHECK-NEXT: s_waitcnt lgkmcnt(2)
; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(2)
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p3_p3_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p3_p3_sz32_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset0:2 offset1:3
; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p3_p3_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p3_p3_sz16_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p3_p3_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p3_p3_sz31_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b64 v[5:6], v1 offset:23
; CHECK-NEXT: ds_read_b64 v[7:8], v1 offset:16
; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(2)
; CHECK-NEXT: ds_write_b64 v0, v[5:6] offset:23
; CHECK-NEXT: s_waitcnt lgkmcnt(2)
; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(2)
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p3_p3_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p3_p3_sz32_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset0:2 offset1:3
; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p3_p3_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p3_p3_sz16_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p3_p3_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p3_p3_sz31_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1
; CHECK-NEXT: ds_read_b128 v[6:9], v1 offset:15
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: ds_write_b128 v0, v[6:9] offset:15
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p3_p3_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p3_p3_sz32_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset0:2 offset1:3
; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p3_p3_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p3_p3_sz16_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b128 v[1:4], v1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: ds_write_b128 v0, v[1:4]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p3_p3_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p3_p3_sz31_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b128 v[2:5], v1 offset:15
; CHECK-NEXT: ds_read_b128 v[6:9], v1
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: ds_write_b128 v0, v[2:5] offset:15
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: ds_write_b128 v0, v[6:9]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p3_p3_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p3_p3_sz32_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b128 v[2:5], v1 offset:16
; CHECK-NEXT: ds_read_b128 v[6:9], v1
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: ds_write_b128 v0, v[2:5] offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: ds_write_b128 v0, v[6:9]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p3_p4_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p3_p4_sz16_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p3_p4_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p3_p4_sz31_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x2
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16
; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write_b64 v0, v[1:2] offset:23
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p3_p4_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p3_p4_sz32_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p3_p4_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p3_p4_sz16_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p3_p4_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p3_p4_sz31_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x2
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16
; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write_b64 v0, v[1:2] offset:23
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p3_p4_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p3_p4_sz32_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p3_p4_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p3_p4_sz16_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p3_p4_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p3_p4_sz31_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p3_p4_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p3_p4_sz32_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p3_p4_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p3_p4_sz16_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write_b128 v0, v[1:4]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p3_p4_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p3_p4_sz31_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: ds_write_b128 v0, v[3:6]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p3_p4_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p3_p4_sz32_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: ds_write_b128 v0, v[3:6]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p3_p5_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p3_p5_sz16_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p3_p5_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p3_p5_sz31_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:23
; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:27
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: ds_write_b64 v0, v[6:7] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write_b64 v0, v[8:9] offset:23
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p3_p5_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p3_p5_sz32_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset0:2 offset1:3
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p3_p5_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p3_p5_sz16_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p3_p5_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p3_p5_sz31_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:23
; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:27
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: ds_write_b64 v0, v[6:7] offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write_b64 v0, v[8:9] offset:23
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p3_p5_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p3_p5_sz32_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset0:2 offset1:3
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p3_p5_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p3_p5_sz16_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p3_p5_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p3_p5_sz31_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:15
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:19
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:23
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:27
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset1:1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write_b128 v0, v[2:5] offset:15
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p3_p5_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p3_p5_sz32_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset0:2 offset1:3
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p3_p5_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p3_p5_sz16_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write_b128 v0, v[2:5]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p3_p5_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p3_p5_sz31_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:15
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:19
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:23
; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:27
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: ds_write_b128 v0, v[2:5]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write_b128 v0, v[6:9] offset:15
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p3_p5_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p3_p5_sz32_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: ds_write_b128 v0, v[2:5]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write_b128 v0, v[6:9] offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p5_p0_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p5_p0_sz16_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p5_p0_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p5_p0_sz31_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: flat_load_ubyte v8, v[1:2] offset:30
; CHECK-NEXT: flat_load_ushort v9, v[1:2] offset:28
; CHECK-NEXT: flat_load_dwordx3 v[5:7], v[1:2] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3)
; CHECK-NEXT: buffer_store_byte v8, v0, s[0:3], 0 offen offset:30
; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p5_p0_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p5_p0_sz32_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p5_p0_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p5_p0_sz16_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p5_p0_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p5_p0_sz31_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: flat_load_ubyte v8, v[1:2] offset:30
; CHECK-NEXT: flat_load_ushort v9, v[1:2] offset:28
; CHECK-NEXT: flat_load_dwordx3 v[5:7], v[1:2] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3)
; CHECK-NEXT: buffer_store_byte v8, v0, s[0:3], 0 offen offset:30
; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p5_p0_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(0) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p5_p0_sz32_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p5_p0_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p5_p0_sz16_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p5_p0_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p5_p0_sz31_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:15
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:27
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:23
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:19
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:15
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p5_p0_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(0) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p5_p0_sz32_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p5_p0_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p5_p0_sz16_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p5_p0_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p5_p0_sz31_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:15
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:27
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:23
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:19
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:15
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p5_p0_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(0) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p5_p0_sz32_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16
; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2]
; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p5_p1_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p5_p1_sz16_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p5_p1_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p5_p1_sz31_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16
; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28
; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
; CHECK-NEXT: s_waitcnt vmcnt(3)
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p5_p1_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p5_p1_sz32_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p5_p1_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p5_p1_sz16_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p5_p1_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p5_p1_sz31_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16
; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28
; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
; CHECK-NEXT: s_waitcnt vmcnt(3)
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p5_p1_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(1) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p5_p1_sz32_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p5_p1_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p5_p1_sz16_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p5_p1_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p5_p1_sz31_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:27
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:23
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:19
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:15
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p5_p1_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(1) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p5_p1_sz32_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p5_p1_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p5_p1_sz16_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p5_p1_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p5_p1_sz31_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:27
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:23
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:19
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:15
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p5_p1_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(1) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p5_p1_sz32_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p5_p3_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p5_p3_sz16_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p5_p3_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p5_p3_sz31_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b32 v8, v1 offset:24
; CHECK-NEXT: ds_read_u16 v9, v1 offset:28
; CHECK-NEXT: ds_read_u8 v10, v1 offset:30
; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1
; CHECK-NEXT: ds_read_b64 v[6:7], v1 offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(4)
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: s_waitcnt lgkmcnt(3)
; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt lgkmcnt(2)
; CHECK-NEXT: buffer_store_byte v10, v0, s[0:3], 0 offen offset:30
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p5_p3_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(3) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p5_p3_sz32_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1
; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset0:2 offset1:3
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p5_p3_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p5_p3_sz16_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p5_p3_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p5_p3_sz31_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b32 v8, v1 offset:24
; CHECK-NEXT: ds_read_u16 v9, v1 offset:28
; CHECK-NEXT: ds_read_u8 v10, v1 offset:30
; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1
; CHECK-NEXT: ds_read_b64 v[6:7], v1 offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(4)
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: s_waitcnt lgkmcnt(3)
; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt lgkmcnt(2)
; CHECK-NEXT: buffer_store_byte v10, v0, s[0:3], 0 offen offset:30
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p5_p3_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(3) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p5_p3_sz32_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1
; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset0:2 offset1:3
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p5_p3_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p5_p3_sz16_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p5_p3_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p5_p3_sz31_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1
; CHECK-NEXT: ds_read_b128 v[6:9], v1 offset:15
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:27
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:23
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:19
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:15
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p5_p3_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(3) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p5_p3_sz32_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1
; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset0:2 offset1:3
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p5_p3_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p5_p3_sz16_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b128 v[1:4], v1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p5_p3_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p5_p3_sz31_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b128 v[2:5], v1
; CHECK-NEXT: ds_read_b128 v[6:9], v1 offset:15
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:27
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:23
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:19
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:15
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p5_p3_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(3) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p5_p3_sz32_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: ds_read_b128 v[2:5], v1
; CHECK-NEXT: ds_read_b128 v[6:9], v1 offset:16
; CHECK-NEXT: s_waitcnt lgkmcnt(1)
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p5_p4_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p5_p4_sz16_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p5_p4_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p5_p4_sz31_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16
; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28
; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
; CHECK-NEXT: s_waitcnt vmcnt(3)
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p5_p4_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p5_p4_sz32_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p5_p4_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p5_p4_sz16_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p5_p4_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p5_p4_sz31_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16
; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28
; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
; CHECK-NEXT: s_waitcnt vmcnt(3)
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p5_p4_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(4) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p5_p4_sz32_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p5_p4_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p5_p4_sz16_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p5_p4_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p5_p4_sz31_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:27
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:23
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:19
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:15
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p5_p4_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(4) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p5_p4_sz32_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p5_p4_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p5_p4_sz16_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p5_p4_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p5_p4_sz31_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:27
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:23
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:19
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:15
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p5_p4_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(4) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p5_p4_sz32_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x1
; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off
; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p5_p5_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p5_p5_sz16_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: s_waitcnt vmcnt(3)
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p5_p5_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p5_p5_sz31_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x8
; CHECK-NEXT: buffer_load_ushort v2, v1, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_ubyte v1, v1, s[0:3], 0 offen offset:30
; CHECK-NEXT: s_waitcnt vmcnt(8)
; CHECK-NEXT: buffer_store_short v2, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(7)
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: s_waitcnt vmcnt(6)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_waitcnt vmcnt(5)
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_waitcnt vmcnt(3)
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen offset:30
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p5_p5_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
; CHECK-LABEL: memcpy_p5_p5_sz32_align_1_1:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: s_waitcnt vmcnt(7)
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: s_waitcnt vmcnt(6)
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(5)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: s_waitcnt vmcnt(3)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p5_p5_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p5_p5_sz16_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: s_waitcnt vmcnt(3)
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p5_p5_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p5_p5_sz31_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x8
; CHECK-NEXT: buffer_load_ushort v2, v1, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_ubyte v1, v1, s[0:3], 0 offen offset:30
; CHECK-NEXT: s_waitcnt vmcnt(8)
; CHECK-NEXT: buffer_store_short v2, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(7)
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: s_waitcnt vmcnt(6)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_waitcnt vmcnt(5)
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_waitcnt vmcnt(3)
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen offset:30
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p5_p5_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 readonly %src) {
; CHECK-LABEL: memcpy_p5_p5_sz32_align_2_2:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: s_waitcnt vmcnt(7)
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: s_waitcnt vmcnt(6)
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(5)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: s_waitcnt vmcnt(3)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p5_p5_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p5_p5_sz16_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(3)
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p5_p5_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p5_p5_sz31_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:15
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:19
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:23
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:27
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(7)
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:15
; CHECK-NEXT: s_waitcnt vmcnt(6)
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:19
; CHECK-NEXT: s_waitcnt vmcnt(5)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:23
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:27
; CHECK-NEXT: s_waitcnt vmcnt(3)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p5_p5_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(5) align 8 readonly %src) {
; CHECK-LABEL: memcpy_p5_p5_sz32_align_8_8:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(7)
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_waitcnt vmcnt(6)
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: s_waitcnt vmcnt(5)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(3)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false)
ret void
}
define void @memcpy_p5_p5_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p5_p5_sz16_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x3
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(3)
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false)
ret void
}
define void @memcpy_p5_p5_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p5_p5_sz31_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:15
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:19
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:23
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:27
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(7)
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:15
; CHECK-NEXT: s_waitcnt vmcnt(6)
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:19
; CHECK-NEXT: s_waitcnt vmcnt(5)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:23
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:27
; CHECK-NEXT: s_waitcnt vmcnt(3)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false)
ret void
}
define void @memcpy_p5_p5_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(5) align 16 readonly %src) {
; CHECK-LABEL: memcpy_p5_p5_sz32_align_16_16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_clause 0x7
; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:16
; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:20
; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:24
; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:28
; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen
; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4
; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8
; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_waitcnt vmcnt(7)
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:16
; CHECK-NEXT: s_waitcnt vmcnt(6)
; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:20
; CHECK-NEXT: s_waitcnt vmcnt(5)
; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:24
; CHECK-NEXT: s_waitcnt vmcnt(4)
; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:28
; CHECK-NEXT: s_waitcnt vmcnt(3)
; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:4
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:8
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false)
ret void
}
declare void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2
declare void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2
declare void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2
declare void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2
declare void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2
declare void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2
declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2
declare void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2
declare void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2
declare void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2
declare void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2
declare void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2
declare void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2
declare void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2
declare void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2
declare void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2
declare void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2
declare void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2
declare void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2
declare void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2
attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }