; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -S -passes=amdgpu-image-intrinsic-opt -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=NO-MSAA %s
; RUN: opt -S -passes=amdgpu-image-intrinsic-opt -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=NO-MSAA %s
; RUN: opt -S -passes=amdgpu-image-intrinsic-opt -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1150 < %s | FileCheck -check-prefixes=MSAA %s
define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0:[0-9]+]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
; NO-MSAA-NEXT: ret [4 x float] [[I7]]
;
; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0:[0-9]+]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
; MSAA-NEXT: ret [4 x float] [[I7]]
;
main_body:
%i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i4 = insertvalue [4 x float] undef, float %i, 0
%i5 = insertvalue [4 x float] %i4, float %i1, 1
%i6 = insertvalue [4 x float] %i5, float %i2, 2
%i7 = insertvalue [4 x float] %i6, float %i3, 3
ret [4 x float] %i7
}
define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask2(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask2(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
; NO-MSAA-NEXT: ret [4 x float] [[I7]]
;
; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask2(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
; MSAA-NEXT: ret [4 x float] [[I7]]
;
main_body:
%i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i4 = insertvalue [4 x float] undef, float %i, 0
%i5 = insertvalue [4 x float] %i4, float %i1, 1
%i6 = insertvalue [4 x float] %i5, float %i2, 2
%i7 = insertvalue [4 x float] %i6, float %i3, 3
ret [4 x float] %i7
}
define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask4(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask4(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
; NO-MSAA-NEXT: ret [4 x float] [[I7]]
;
; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask4(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
; MSAA-NEXT: ret [4 x float] [[I7]]
;
main_body:
%i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i4 = insertvalue [4 x float] undef, float %i, 0
%i5 = insertvalue [4 x float] %i4, float %i1, 1
%i6 = insertvalue [4 x float] %i5, float %i2, 2
%i7 = insertvalue [4 x float] %i6, float %i3, 3
ret [4 x float] %i7
}
define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask8(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask8(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
; NO-MSAA-NEXT: ret [4 x float] [[I7]]
;
; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask8(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
; MSAA-NEXT: ret [4 x float] [[I7]]
;
main_body:
%i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i4 = insertvalue [4 x float] undef, float %i, 0
%i5 = insertvalue [4 x float] %i4, float %i1, 1
%i6 = insertvalue [4 x float] %i5, float %i2, 2
%i7 = insertvalue [4 x float] %i6, float %i3, 3
ret [4 x float] %i7
}
define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_reverse(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_reverse(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
; NO-MSAA-NEXT: ret [4 x float] [[I7]]
;
; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_reverse(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
; MSAA-NEXT: ret [4 x float] [[I7]]
;
main_body:
%i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i4 = insertvalue [4 x float] undef, float %i, 0
%i5 = insertvalue [4 x float] %i4, float %i1, 1
%i6 = insertvalue [4 x float] %i5, float %i2, 2
%i7 = insertvalue [4 x float] %i6, float %i3, 3
ret [4 x float] %i7
}
; Don't combine because the vaddr inputs are not identical.
define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_vaddr(<8 x i32> inreg %rsrc, i32 %s0, i32 %t0, i32 %s1, i32 %t1, i32 %s2, i32 %t2, i32 %s3, i32 %t3) {
; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_vaddr(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S0:%.*]], i32 [[T0:%.*]], i32 [[S1:%.*]], i32 [[T1:%.*]], i32 [[S2:%.*]], i32 [[T2:%.*]], i32 [[S3:%.*]], i32 [[T3:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S0]], i32 [[T0]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S1]], i32 [[T1]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S2]], i32 [[T2]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S3]], i32 [[T3]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
; NO-MSAA-NEXT: ret [4 x float] [[I7]]
;
; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_vaddr(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S0:%.*]], i32 [[T0:%.*]], i32 [[S1:%.*]], i32 [[T1:%.*]], i32 [[S2:%.*]], i32 [[T2:%.*]], i32 [[S3:%.*]], i32 [[T3:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S0]], i32 [[T0]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S1]], i32 [[T1]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S2]], i32 [[T2]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S3]], i32 [[T3]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
; MSAA-NEXT: ret [4 x float] [[I7]]
;
main_body:
%i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s0, i32 %t0, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s1, i32 %t1, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s2, i32 %t2, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s3, i32 %t3, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i4 = insertvalue [4 x float] undef, float %i, 0
%i5 = insertvalue [4 x float] %i4, float %i1, 1
%i6 = insertvalue [4 x float] %i5, float %i2, 2
%i7 = insertvalue [4 x float] %i6, float %i3, 3
ret [4 x float] %i7
}
define amdgpu_ps [8 x float] @load_2dmsaa_v8f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
; NO-MSAA-LABEL: define amdgpu_ps [8 x float] @load_2dmsaa_v8f32(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I4:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I5:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I6:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I7:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I8:%.*]] = insertvalue [8 x float] undef, float [[I]], 0
; NO-MSAA-NEXT: [[I9:%.*]] = insertvalue [8 x float] [[I8]], float [[I1]], 1
; NO-MSAA-NEXT: [[I10:%.*]] = insertvalue [8 x float] [[I9]], float [[I2]], 2
; NO-MSAA-NEXT: [[I11:%.*]] = insertvalue [8 x float] [[I10]], float [[I3]], 3
; NO-MSAA-NEXT: [[I12:%.*]] = insertvalue [8 x float] [[I11]], float [[I4]], 4
; NO-MSAA-NEXT: [[I13:%.*]] = insertvalue [8 x float] [[I12]], float [[I5]], 5
; NO-MSAA-NEXT: [[I14:%.*]] = insertvalue [8 x float] [[I13]], float [[I6]], 6
; NO-MSAA-NEXT: [[I15:%.*]] = insertvalue [8 x float] [[I14]], float [[I7]], 7
; NO-MSAA-NEXT: ret [8 x float] [[I15]]
;
; MSAA-LABEL: define amdgpu_ps [8 x float] @load_2dmsaa_v8f32(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
; MSAA-NEXT: [[I4:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[I5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: [[I6:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
; MSAA-NEXT: [[I7:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
; MSAA-NEXT: [[I8:%.*]] = insertvalue [8 x float] undef, float [[I]], 0
; MSAA-NEXT: [[I9:%.*]] = insertvalue [8 x float] [[I8]], float [[I1]], 1
; MSAA-NEXT: [[I10:%.*]] = insertvalue [8 x float] [[I9]], float [[I2]], 2
; MSAA-NEXT: [[I11:%.*]] = insertvalue [8 x float] [[I10]], float [[I3]], 3
; MSAA-NEXT: [[I12:%.*]] = insertvalue [8 x float] [[I11]], float [[I4]], 4
; MSAA-NEXT: [[I13:%.*]] = insertvalue [8 x float] [[I12]], float [[I5]], 5
; MSAA-NEXT: [[I14:%.*]] = insertvalue [8 x float] [[I13]], float [[I6]], 6
; MSAA-NEXT: [[I15:%.*]] = insertvalue [8 x float] [[I14]], float [[I7]], 7
; MSAA-NEXT: ret [8 x float] [[I15]]
;
main_body:
%i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i4 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i5 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i6 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i7 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i8 = insertvalue [8 x float] undef, float %i, 0
%i9 = insertvalue [8 x float] %i8, float %i1, 1
%i10 = insertvalue [8 x float] %i9, float %i2, 2
%i11 = insertvalue [8 x float] %i10, float %i3, 3
%i12 = insertvalue [8 x float] %i11, float %i4, 4
%i13 = insertvalue [8 x float] %i12, float %i5, 5
%i14 = insertvalue [8 x float] %i13, float %i6, 6
%i15 = insertvalue [8 x float] %i14, float %i7, 7
ret [8 x float] %i15
}
define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_interleaved(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_interleaved(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I3:%.*]] = insertvalue [4 x float] [[I1]], float [[I2]], 1
; NO-MSAA-NEXT: [[I4:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I3]], float [[I4]], 2
; NO-MSAA-NEXT: [[I6:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I5]], float [[I6]], 3
; NO-MSAA-NEXT: ret [4 x float] [[I7]]
;
; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_interleaved(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: [[I4:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
; MSAA-NEXT: [[I6:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
; MSAA-NEXT: [[I1:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; MSAA-NEXT: [[I3:%.*]] = insertvalue [4 x float] [[I1]], float [[I2]], 1
; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I3]], float [[I4]], 2
; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I5]], float [[I6]], 3
; MSAA-NEXT: ret [4 x float] [[I7]]
;
main_body:
%i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = insertvalue [4 x float] undef, float %i, 0
%i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = insertvalue [4 x float] %i1, float %i2, 1
%i4 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i5 = insertvalue [4 x float] %i3, float %i4, 2
%i6 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i7 = insertvalue [4 x float] %i5, float %i6, 3
ret [4 x float] %i7
}
define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId01(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
; NO-MSAA-LABEL: define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId01(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x float] undef, float [[I]], 0
; NO-MSAA-NEXT: [[I3:%.*]] = insertvalue [2 x float] [[I2]], float [[I1]], 1
; NO-MSAA-NEXT: ret [2 x float] [[I3]]
;
; MSAA-LABEL: define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId01(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x float] undef, float [[I]], 0
; MSAA-NEXT: [[I3:%.*]] = insertvalue [2 x float] [[I2]], float [[I1]], 1
; MSAA-NEXT: ret [2 x float] [[I3]]
;
main_body:
%i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = insertvalue [2 x float] undef, float %i, 0
%i3 = insertvalue [2 x float] %i2, float %i1, 1
ret [2 x float] %i3
}
define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId23(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
; NO-MSAA-LABEL: define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId23(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x float] undef, float [[I]], 0
; NO-MSAA-NEXT: [[I3:%.*]] = insertvalue [2 x float] [[I2]], float [[I1]], 1
; NO-MSAA-NEXT: ret [2 x float] [[I3]]
;
; MSAA-LABEL: define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId23(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
; MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x float] undef, float [[I]], 0
; MSAA-NEXT: [[I3:%.*]] = insertvalue [2 x float] [[I2]], float [[I1]], 1
; MSAA-NEXT: ret [2 x float] [[I3]]
;
main_body:
%i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = insertvalue [2 x float] undef, float %i, 0
%i3 = insertvalue [2 x float] %i2, float %i1, 1
ret [2 x float] %i3
}
; Don't combine because it's not profitable: the resulting msaa loads would
; have 8 vdata outputs.
define amdgpu_ps [2 x <2 x float>] @load_2dmsaa_v2v2f32_dmask3(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
; NO-MSAA-LABEL: define amdgpu_ps [2 x <2 x float>] @load_2dmsaa_v2v2f32_dmask3(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [2 x <2 x float>] undef, <2 x float> [[I]], 0
; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [2 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
; NO-MSAA-NEXT: ret [2 x <2 x float>] [[I5]]
;
; MSAA-LABEL: define amdgpu_ps [2 x <2 x float>] @load_2dmsaa_v2v2f32_dmask3(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I4:%.*]] = insertvalue [2 x <2 x float>] undef, <2 x float> [[I]], 0
; MSAA-NEXT: [[I5:%.*]] = insertvalue [2 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
; MSAA-NEXT: ret [2 x <2 x float>] [[I5]]
;
main_body:
%i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i4 = insertvalue [2 x <2 x float>] undef, <2 x float> %i, 0
%i5 = insertvalue [2 x <2 x float>] %i4, <2 x float> %i1, 1
ret [2 x <2 x float>] %i5
}
define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask3(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask3(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
; NO-MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
;
; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask3(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0
; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
; MSAA-NEXT: [[I:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i64 1
; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i64 0
; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
; MSAA-NEXT: [[I1:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i64 1
; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
; MSAA-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0
; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
; MSAA-NEXT: [[I2:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP10]], i64 1
; MSAA-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
; MSAA-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP11]], i64 0
; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
; MSAA-NEXT: [[I3:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP13]], i64 1
; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
; MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
;
main_body:
%i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
%i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
%i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
%i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
ret [4 x <2 x float>] %i7
}
define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask5(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask5(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
; NO-MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
;
; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask5(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0
; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
; MSAA-NEXT: [[I:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i64 1
; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i64 0
; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
; MSAA-NEXT: [[I1:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i64 1
; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
; MSAA-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0
; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
; MSAA-NEXT: [[I2:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP10]], i64 1
; MSAA-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
; MSAA-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP11]], i64 0
; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
; MSAA-NEXT: [[I3:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP13]], i64 1
; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
; MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
;
main_body:
%i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
%i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
%i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
%i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
ret [4 x <2 x float>] %i7
}
define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask6(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask6(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
; NO-MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
;
; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask6(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0
; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
; MSAA-NEXT: [[I:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i64 1
; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i64 0
; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
; MSAA-NEXT: [[I1:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i64 1
; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
; MSAA-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0
; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
; MSAA-NEXT: [[I2:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP10]], i64 1
; MSAA-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
; MSAA-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP11]], i64 0
; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
; MSAA-NEXT: [[I3:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP13]], i64 1
; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
; MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
;
main_body:
%i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
%i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
%i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
%i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
ret [4 x <2 x float>] %i7
}
define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask9(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask9(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
; NO-MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
;
; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask9(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0
; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
; MSAA-NEXT: [[I:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i64 1
; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i64 0
; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
; MSAA-NEXT: [[I1:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i64 1
; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
; MSAA-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0
; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
; MSAA-NEXT: [[I2:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP10]], i64 1
; MSAA-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
; MSAA-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP11]], i64 0
; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
; MSAA-NEXT: [[I3:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP13]], i64 1
; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
; MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
;
main_body:
%i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
%i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
%i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
%i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
ret [4 x <2 x float>] %i7
}
define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask10(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask10(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
; NO-MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
;
; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask10(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0
; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
; MSAA-NEXT: [[I:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i64 1
; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i64 0
; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
; MSAA-NEXT: [[I1:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i64 1
; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
; MSAA-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0
; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
; MSAA-NEXT: [[I2:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP10]], i64 1
; MSAA-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
; MSAA-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP11]], i64 0
; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
; MSAA-NEXT: [[I3:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP13]], i64 1
; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
; MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
;
main_body:
%i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
%i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
%i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
%i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
ret [4 x <2 x float>] %i7
}
define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask12(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask12(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
; NO-MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
;
; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask12(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0
; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
; MSAA-NEXT: [[I:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i64 1
; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i64 0
; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
; MSAA-NEXT: [[I1:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i64 1
; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
; MSAA-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0
; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
; MSAA-NEXT: [[I2:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP10]], i64 1
; MSAA-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
; MSAA-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP11]], i64 0
; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
; MSAA-NEXT: [[I3:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP13]], i64 1
; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
; MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
;
main_body:
%i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
%i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
%i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
%i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
ret [4 x <2 x float>] %i7
}
define amdgpu_ps [2 x half] @load_2dmsaa_v2f16_fragId01(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
; NO-MSAA-LABEL: define amdgpu_ps [2 x half] @load_2dmsaa_v2f16_fragId01(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call half @llvm.amdgcn.image.load.2dmsaa.f16.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call half @llvm.amdgcn.image.load.2dmsaa.f16.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x half] undef, half [[I]], 0
; NO-MSAA-NEXT: [[I3:%.*]] = insertvalue [2 x half] [[I2]], half [[I1]], 1
; NO-MSAA-NEXT: ret [2 x half] [[I3]]
;
; MSAA-LABEL: define amdgpu_ps [2 x half] @load_2dmsaa_v2f16_fragId01(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x half> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I:%.*]] = extractelement <4 x half> [[TMP0]], i64 0
; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x half> [[TMP0]], i64 1
; MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x half] undef, half [[I]], 0
; MSAA-NEXT: [[I3:%.*]] = insertvalue [2 x half] [[I2]], half [[I1]], 1
; MSAA-NEXT: ret [2 x half] [[I3]]
;
main_body:
%i = call half @llvm.amdgcn.image.load.2dmsaa.f16.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call half @llvm.amdgcn.image.load.2dmsaa.f16.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = insertvalue [2 x half] undef, half %i, 0
%i3 = insertvalue [2 x half] %i2, half %i1, 1
ret [2 x half] %i3
}
define amdgpu_ps [4 x float] @load_2darraymsaa_v4f32_dmask1(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2darraymsaa_v4f32_dmask1(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
; NO-MSAA-NEXT: ret [4 x float] [[I7]]
;
; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2darraymsaa_v4f32_dmask1(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
; MSAA-NEXT: ret [4 x float] [[I7]]
;
main_body:
%i = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i4 = insertvalue [4 x float] undef, float %i, 0
%i5 = insertvalue [4 x float] %i4, float %i1, 1
%i6 = insertvalue [4 x float] %i5, float %i2, 2
%i7 = insertvalue [4 x float] %i6, float %i3, 3
ret [4 x float] %i7
}
define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask3(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask3(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
; NO-MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
;
; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask3(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0
; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
; MSAA-NEXT: [[I:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i64 1
; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i64 0
; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
; MSAA-NEXT: [[I1:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i64 1
; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
; MSAA-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0
; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
; MSAA-NEXT: [[I2:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP10]], i64 1
; MSAA-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
; MSAA-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP11]], i64 0
; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
; MSAA-NEXT: [[I3:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP13]], i64 1
; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
; MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
;
main_body:
%i = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 %slice, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 %slice, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 %slice, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 %slice, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
%i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
%i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
%i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
ret [4 x <2 x float>] %i7
}
define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
; NO-MSAA-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I3:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0
; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1
; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2
; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[I3]], 3
; NO-MSAA-NEXT: ret [4 x <3 x float>] [[I7]]
;
; MSAA-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[TMP4:%.*]] = insertelement <3 x float> undef, float [[TMP3]], i64 0
; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
; MSAA-NEXT: [[TMP6:%.*]] = insertelement <3 x float> [[TMP4]], float [[TMP5]], i64 1
; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP2]], i64 0
; MSAA-NEXT: [[I:%.*]] = insertelement <3 x float> [[TMP6]], float [[TMP7]], i64 2
; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: [[TMP9:%.*]] = insertelement <3 x float> undef, float [[TMP8]], i64 0
; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
; MSAA-NEXT: [[TMP11:%.*]] = insertelement <3 x float> [[TMP9]], float [[TMP10]], i64 1
; MSAA-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP2]], i64 1
; MSAA-NEXT: [[I1:%.*]] = insertelement <3 x float> [[TMP11]], float [[TMP12]], i64 2
; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
; MSAA-NEXT: [[TMP14:%.*]] = insertelement <3 x float> undef, float [[TMP13]], i64 0
; MSAA-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
; MSAA-NEXT: [[TMP16:%.*]] = insertelement <3 x float> [[TMP14]], float [[TMP15]], i64 1
; MSAA-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[TMP2]], i64 2
; MSAA-NEXT: [[I2:%.*]] = insertelement <3 x float> [[TMP16]], float [[TMP17]], i64 2
; MSAA-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
; MSAA-NEXT: [[TMP19:%.*]] = insertelement <3 x float> undef, float [[TMP18]], i64 0
; MSAA-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
; MSAA-NEXT: [[TMP21:%.*]] = insertelement <3 x float> [[TMP19]], float [[TMP20]], i64 1
; MSAA-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP2]], i64 3
; MSAA-NEXT: [[I3:%.*]] = insertelement <3 x float> [[TMP21]], float [[TMP22]], i64 2
; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0
; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1
; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2
; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[I3]], 3
; MSAA-NEXT: ret [4 x <3 x float>] [[I7]]
;
main_body:
%i = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i4 = insertvalue [4 x <3 x float>] undef, <3 x float> %i, 0
%i5 = insertvalue [4 x <3 x float>] %i4, <3 x float> %i1, 1
%i6 = insertvalue [4 x <3 x float>] %i5, <3 x float> %i2, 2
%i7 = insertvalue [4 x <3 x float>] %i6, <3 x float> %i3, 3
ret [4 x <3 x float>] %i7
}
define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7_group1(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
; NO-MSAA-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7_group1(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 4, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 5, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 6, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I3:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 7, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0
; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1
; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2
; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[I3]], 3
; NO-MSAA-NEXT: ret [4 x <3 x float>] [[I7]]
;
; MSAA-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7_group1(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 4, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 4, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 4, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[TMP4:%.*]] = insertelement <3 x float> undef, float [[TMP3]], i64 0
; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
; MSAA-NEXT: [[TMP6:%.*]] = insertelement <3 x float> [[TMP4]], float [[TMP5]], i64 1
; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP2]], i64 0
; MSAA-NEXT: [[I:%.*]] = insertelement <3 x float> [[TMP6]], float [[TMP7]], i64 2
; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: [[TMP9:%.*]] = insertelement <3 x float> undef, float [[TMP8]], i64 0
; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
; MSAA-NEXT: [[TMP11:%.*]] = insertelement <3 x float> [[TMP9]], float [[TMP10]], i64 1
; MSAA-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP2]], i64 1
; MSAA-NEXT: [[I1:%.*]] = insertelement <3 x float> [[TMP11]], float [[TMP12]], i64 2
; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
; MSAA-NEXT: [[TMP14:%.*]] = insertelement <3 x float> undef, float [[TMP13]], i64 0
; MSAA-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
; MSAA-NEXT: [[TMP16:%.*]] = insertelement <3 x float> [[TMP14]], float [[TMP15]], i64 1
; MSAA-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[TMP2]], i64 2
; MSAA-NEXT: [[I2:%.*]] = insertelement <3 x float> [[TMP16]], float [[TMP17]], i64 2
; MSAA-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
; MSAA-NEXT: [[TMP19:%.*]] = insertelement <3 x float> undef, float [[TMP18]], i64 0
; MSAA-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
; MSAA-NEXT: [[TMP21:%.*]] = insertelement <3 x float> [[TMP19]], float [[TMP20]], i64 1
; MSAA-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP2]], i64 3
; MSAA-NEXT: [[I3:%.*]] = insertelement <3 x float> [[TMP21]], float [[TMP22]], i64 2
; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0
; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1
; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2
; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[I3]], 3
; MSAA-NEXT: ret [4 x <3 x float>] [[I7]]
;
main_body:
%i = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 4, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 5, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 6, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 7, <8 x i32> %rsrc, i32 0, i32 0)
%i4 = insertvalue [4 x <3 x float>] undef, <3 x float> %i, 0
%i5 = insertvalue [4 x <3 x float>] %i4, <3 x float> %i1, 1
%i6 = insertvalue [4 x <3 x float>] %i5, <3 x float> %i2, 2
%i7 = insertvalue [4 x <3 x float>] %i6, <3 x float> %i3, 3
ret [4 x <3 x float>] %i7
}
define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_sections(<8 x i32> inreg %rsrc, float %vdata, i32 %s, i32 %t, i32 %fragid) {
; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_sections(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], float [[VDATA:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: call void @llvm.amdgcn.image.store.2dmsaa.f32.i32.v8i32(float [[VDATA]], i32 1, i32 [[S]], i32 [[T]], i32 [[FRAGID]], <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
; NO-MSAA-NEXT: ret [4 x float] [[I7]]
;
; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_sections(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], float [[VDATA:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: call void @llvm.amdgcn.image.store.2dmsaa.f32.i32.v8i32(float [[VDATA]], i32 1, i32 [[S]], i32 [[T]], i32 [[FRAGID]], <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
; MSAA-NEXT: ret [4 x float] [[I7]]
;
main_body:
%i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
call void @llvm.amdgcn.image.store.2dmsaa.f32.i32(float %vdata, i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i4 = insertvalue [4 x float] undef, float %i, 0
%i5 = insertvalue [4 x float] %i4, float %i1, 1
%i6 = insertvalue [4 x float] %i5, float %i2, 2
%i7 = insertvalue [4 x float] %i6, float %i3, 3
ret [4 x float] %i7
}
define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_blocks(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %cond) {
; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_blocks(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[COND:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
; NO-MSAA-NEXT: [[I8:%.*]] = trunc i32 [[COND]] to i1
; NO-MSAA-NEXT: br i1 [[I8]], label [[IF_EQUAL:%.*]], label [[IF_UNEQUAL:%.*]]
; NO-MSAA: if_equal:
; NO-MSAA-NEXT: [[I9:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I10:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I11:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I12:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I13:%.*]] = insertvalue [4 x float] undef, float [[I9]], 0
; NO-MSAA-NEXT: [[I14:%.*]] = insertvalue [4 x float] [[I13]], float [[I10]], 1
; NO-MSAA-NEXT: [[I15:%.*]] = insertvalue [4 x float] [[I14]], float [[I11]], 2
; NO-MSAA-NEXT: [[I16:%.*]] = insertvalue [4 x float] [[I15]], float [[I12]], 3
; NO-MSAA-NEXT: br label [[MERGE:%.*]]
; NO-MSAA: if_unequal:
; NO-MSAA-NEXT: [[I17:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I18:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I19:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I20:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I21:%.*]] = insertvalue [4 x float] undef, float [[I17]], 0
; NO-MSAA-NEXT: [[I22:%.*]] = insertvalue [4 x float] [[I21]], float [[I18]], 1
; NO-MSAA-NEXT: [[I23:%.*]] = insertvalue [4 x float] [[I22]], float [[I19]], 2
; NO-MSAA-NEXT: [[I24:%.*]] = insertvalue [4 x float] [[I23]], float [[I20]], 3
; NO-MSAA-NEXT: br label [[MERGE]]
; NO-MSAA: merge:
; NO-MSAA-NEXT: [[I25:%.*]] = phi [4 x float] [ [[I16]], [[IF_EQUAL]] ], [ [[I24]], [[IF_UNEQUAL]] ]
; NO-MSAA-NEXT: ret [4 x float] [[I25]]
;
; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_blocks(
; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[COND:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
; MSAA-NEXT: [[I8:%.*]] = trunc i32 [[COND]] to i1
; MSAA-NEXT: br i1 [[I8]], label [[IF_EQUAL:%.*]], label [[IF_UNEQUAL:%.*]]
; MSAA: if_equal:
; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I9:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
; MSAA-NEXT: [[I10:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
; MSAA-NEXT: [[I11:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
; MSAA-NEXT: [[I12:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
; MSAA-NEXT: [[I13:%.*]] = insertvalue [4 x float] undef, float [[I9]], 0
; MSAA-NEXT: [[I14:%.*]] = insertvalue [4 x float] [[I13]], float [[I10]], 1
; MSAA-NEXT: [[I15:%.*]] = insertvalue [4 x float] [[I14]], float [[I11]], 2
; MSAA-NEXT: [[I16:%.*]] = insertvalue [4 x float] [[I15]], float [[I12]], 3
; MSAA-NEXT: br label [[MERGE:%.*]]
; MSAA: if_unequal:
; MSAA-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
; MSAA-NEXT: [[I17:%.*]] = extractelement <4 x float> [[TMP2]], i64 0
; MSAA-NEXT: [[I18:%.*]] = extractelement <4 x float> [[TMP2]], i64 1
; MSAA-NEXT: [[I19:%.*]] = extractelement <4 x float> [[TMP2]], i64 2
; MSAA-NEXT: [[I20:%.*]] = extractelement <4 x float> [[TMP2]], i64 3
; MSAA-NEXT: [[I21:%.*]] = insertvalue [4 x float] undef, float [[I17]], 0
; MSAA-NEXT: [[I22:%.*]] = insertvalue [4 x float] [[I21]], float [[I18]], 1
; MSAA-NEXT: [[I23:%.*]] = insertvalue [4 x float] [[I22]], float [[I19]], 2
; MSAA-NEXT: [[I24:%.*]] = insertvalue [4 x float] [[I23]], float [[I20]], 3
; MSAA-NEXT: br label [[MERGE]]
; MSAA: merge:
; MSAA-NEXT: [[I25:%.*]] = phi [4 x float] [ [[I16]], [[IF_EQUAL]] ], [ [[I24]], [[IF_UNEQUAL]] ]
; MSAA-NEXT: ret [4 x float] [[I25]]
;
main_body:
%i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i4 = insertvalue [4 x float] undef, float %i, 0
%i5 = insertvalue [4 x float] %i4, float %i1, 1
%i6 = insertvalue [4 x float] %i5, float %i2, 2
%i7 = insertvalue [4 x float] %i6, float %i3, 3
%i8 = trunc i32 %cond to i1
br i1 %i8, label %if_equal, label %if_unequal
if_equal:
%i9 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i10 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i11 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i12 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i13 = insertvalue [4 x float] undef, float %i9, 0
%i14 = insertvalue [4 x float] %i13, float %i10, 1
%i15 = insertvalue [4 x float] %i14, float %i11, 2
%i16 = insertvalue [4 x float] %i15, float %i12, 3
br label %merge
if_unequal:
%i17 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
%i18 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
%i19 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
%i20 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
%i21 = insertvalue [4 x float] undef, float %i17, 0
%i22 = insertvalue [4 x float] %i21, float %i18, 1
%i23 = insertvalue [4 x float] %i22, float %i19, 2
%i24 = insertvalue [4 x float] %i23, float %i20, 3
br label %merge
merge:
%i25 = phi [4 x float] [%i16, %if_equal], [%i24, %if_unequal]
ret [4 x float] %i25
}
define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1_different_rsrc(<8 x i32> inreg %rsrc1, <8 x i32> inreg %rsrc2, i32 %s, i32 %t) {
; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1_different_rsrc(
; NO-MSAA-SAME: <8 x i32> inreg [[RSRC1:%.*]], <8 x i32> inreg [[RSRC2:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; NO-MSAA-NEXT: main_body:
; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC1]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC1]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC2]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC2]], i32 0, i32 0)
; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
; NO-MSAA-NEXT: ret [4 x float] [[I7]]
;
; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1_different_rsrc(
; MSAA-SAME: <8 x i32> inreg [[RSRC1:%.*]], <8 x i32> inreg [[RSRC2:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
; MSAA-NEXT: main_body:
; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC1]], i32 0, i32 0)
; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC2]], i32 0, i32 0)
; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
; MSAA-NEXT: ret [4 x float] [[I7]]
;
main_body:
%i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc1, i32 0, i32 0)
%i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc1, i32 0, i32 0)
%i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc2, i32 0, i32 0)
%i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc2, i32 0, i32 0)
%i4 = insertvalue [4 x float] undef, float %i, 0
%i5 = insertvalue [4 x float] %i4, float %i1, 1
%i6 = insertvalue [4 x float] %i5, float %i2, 2
%i7 = insertvalue [4 x float] %i6, float %i3, 3
ret [4 x float] %i7
}
declare float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
declare <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
declare <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
declare float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
declare <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
declare <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32.v8i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
declare half @llvm.amdgcn.image.load.2dmsaa.f16.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
declare void @llvm.amdgcn.image.store.2dmsaa.f32.i32.v8i32(float, i32, i32, i32, i32, <8 x i32>, i32, i32)
attributes #0 = { nounwind readonly willreturn }