llvm/llvm/test/CodeGen/AMDGPU/replace-store-of-insert-load.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck %s

; Regression test for a bug in `DAGCombiner::replaceStoreOfInsertLoad` where
; Idx could be smaller than PtrVT, causing a MUL to be emitted with inconsistent
; LHS/RHS types.

define void @testcase_0(ptr addrspace(1) %in, float %arg) {
; CHECK-LABEL: testcase_0:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT:    global_store_dword v[0:1], v2, off offset:12
; CHECK-NEXT:    s_waitcnt vmcnt(0)
; CHECK-NEXT:    s_setpc_b64 s[30:31]
  %loaded = load <4 x float>, ptr addrspace(1) %in
  %modified = insertelement <4 x float> %loaded, float %arg, i64 3
  store <4 x float> %modified, ptr addrspace(1) %in
  ret void
}

define void @testcase_1(ptr addrspace(1) %in, float %arg) {
; CHECK-LABEL: testcase_1:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT:    global_store_dword v[0:1], v2, off offset:16
; CHECK-NEXT:    s_waitcnt vmcnt(0)
; CHECK-NEXT:    s_setpc_b64 s[30:31]
  %loaded = load <6 x float>, ptr addrspace(1) %in
  %modified = insertelement <6 x float> %loaded, float %arg, i64 4
  store <6 x float> %modified, ptr addrspace(1) %in
  ret void
}

define void @testcase_2(ptr addrspace(1) %in, double %arg) {
; CHECK-LABEL: testcase_2:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off offset:8
; CHECK-NEXT:    s_waitcnt vmcnt(0)
; CHECK-NEXT:    s_setpc_b64 s[30:31]
  %loaded = load <4 x double>, ptr addrspace(1) %in
  %modified = insertelement <4 x double> %loaded, double %arg, i64 1
  store <4 x double> %modified, ptr addrspace(1) %in
  ret void
}

define void @testcase_3(ptr addrspace(1) %in, double %arg) {
; CHECK-LABEL: testcase_3:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off offset:56
; CHECK-NEXT:    s_waitcnt vmcnt(0)
; CHECK-NEXT:    s_setpc_b64 s[30:31]
  %loaded = load <8 x double>, ptr addrspace(1) %in
  %modified = insertelement <8 x double> %loaded, double %arg, i64 7
  store <8 x double> %modified, ptr addrspace(1) %in
  ret void
}