; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx600 < %s | FileCheck -check-prefix=GFX6 %s
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
define i16 @abs_i16(i16 %arg) {
; GFX6-LABEL: abs_i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 0, v0
; GFX6-NEXT: v_max_i32_e32 v0, v0, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: abs_i16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX7-NEXT: v_sub_i32_e32 v1, vcc, 0, v0
; GFX7-NEXT: v_max_i32_e32 v0, v0, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: abs_i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_sub_u16_e32 v1, 0, v0
; GFX8-NEXT: v_max_i16_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: abs_i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_sub_u16_e32 v1, 0, v0
; GFX9-NEXT: v_max_i16_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: abs_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_sub_nc_u16 v1, 0, v0
; GFX10-NEXT: v_max_i16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: abs_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_sub_nc_u16 v1, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_max_i16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: abs_i16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_sub_nc_u16 v1, 0, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_max_i16 v0, v0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
%res = call i16 @llvm.abs.i16(i16 %arg, i1 false)
ret i16 %res
}
define <2 x i16> @v_abs_v2i16(<2 x i16> %arg) {
; GFX6-LABEL: v_abs_v2i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0, v0
; GFX6-NEXT: v_max_i32_e32 v0, v0, v2
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0, v1
; GFX6-NEXT: v_max_i32_e32 v1, v1, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_abs_v2i16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX7-NEXT: v_bfe_i32 v1, v1, 0, 16
; GFX7-NEXT: v_sub_i32_e32 v2, vcc, 0, v0
; GFX7-NEXT: v_max_i32_e32 v0, v0, v2
; GFX7-NEXT: v_sub_i32_e32 v2, vcc, 0, v1
; GFX7-NEXT: v_max_i32_e32 v1, v1, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX7-NEXT: v_or_b32_e32 v0, v0, v2
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_abs_v2i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v1, 0
; GFX8-NEXT: v_sub_u16_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e32 v2, 0, v0
; GFX8-NEXT: v_max_i16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v0, v0, v2
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_abs_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_sub_i16 v1, 0, v0
; GFX9-NEXT: v_pk_max_i16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_abs_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_pk_sub_i16 v1, 0, v0
; GFX10-NEXT: v_pk_max_i16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_abs_v2i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_pk_sub_i16 v1, 0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_pk_max_i16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_abs_v2i16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_sub_i16 v1, 0, v0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_pk_max_i16 v0, v0, v1
; GFX12-NEXT: s_setpc_b64 s[30:31]
%res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %arg, i1 false)
ret <2 x i16> %res
}
define <3 x i16> @v_abs_v3i16(<3 x i16> %arg) {
; GFX6-LABEL: v_abs_v3i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
; GFX6-NEXT: v_max_i32_e32 v0, v0, v3
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; GFX6-NEXT: v_max_i32_e32 v1, v1, v3
; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
; GFX6-NEXT: v_max_i32_e32 v2, v2, v1
; GFX6-NEXT: v_alignbit_b32 v1, v2, v0, 16
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_abs_v3i16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX7-NEXT: v_bfe_i32 v1, v1, 0, 16
; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
; GFX7-NEXT: v_max_i32_e32 v0, v0, v3
; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; GFX7-NEXT: v_max_i32_e32 v1, v1, v3
; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
; GFX7-NEXT: v_max_i32_e32 v2, v2, v1
; GFX7-NEXT: v_alignbit_b32 v1, v2, v0, 16
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_abs_v3i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, 0
; GFX8-NEXT: v_sub_u16_e32 v3, 0, v1
; GFX8-NEXT: v_sub_u16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_max_i16_e32 v1, v1, v3
; GFX8-NEXT: v_sub_u16_e32 v3, 0, v0
; GFX8-NEXT: v_max_i16_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v0, v0, v3
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_abs_v3i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_sub_i16 v2, 0, v0
; GFX9-NEXT: v_pk_max_i16 v0, v0, v2
; GFX9-NEXT: v_pk_sub_i16 v2, 0, v1
; GFX9-NEXT: v_pk_max_i16 v1, v1, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_abs_v3i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_pk_sub_i16 v2, 0, v0
; GFX10-NEXT: v_pk_sub_i16 v3, 0, v1
; GFX10-NEXT: v_pk_max_i16 v0, v0, v2
; GFX10-NEXT: v_pk_max_i16 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_abs_v3i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_pk_sub_i16 v2, 0, v0
; GFX11-NEXT: v_pk_sub_i16 v3, 0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_pk_max_i16 v0, v0, v2
; GFX11-NEXT: v_pk_max_i16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_abs_v3i16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_sub_i16 v2, 0, v0
; GFX12-NEXT: v_pk_sub_i16 v3, 0, v1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_pk_max_i16 v0, v0, v2
; GFX12-NEXT: v_pk_max_i16 v1, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
%res = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %arg, i1 false)
ret <3 x i16> %res
}
define <4 x i16> @v_abs_v4i16(<4 x i16> %arg) {
; GFX6-LABEL: v_abs_v4i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0, v2
; GFX6-NEXT: v_max_i32_e32 v2, v2, v4
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0, v3
; GFX6-NEXT: v_max_i32_e32 v3, v3, v4
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
; GFX6-NEXT: v_max_i32_e32 v0, v0, v3
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; GFX6-NEXT: v_max_i32_e32 v1, v1, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_alignbit_b32 v1, v2, v0, 16
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_abs_v4i16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX7-NEXT: v_bfe_i32 v3, v3, 0, 16
; GFX7-NEXT: v_sub_i32_e32 v4, vcc, 0, v2
; GFX7-NEXT: v_max_i32_e32 v2, v2, v4
; GFX7-NEXT: v_sub_i32_e32 v4, vcc, 0, v3
; GFX7-NEXT: v_max_i32_e32 v3, v3, v4
; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_bfe_i32 v1, v1, 0, 16
; GFX7-NEXT: v_or_b32_e32 v2, v2, v3
; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
; GFX7-NEXT: v_max_i32_e32 v0, v0, v3
; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; GFX7-NEXT: v_max_i32_e32 v1, v1, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7-NEXT: v_alignbit_b32 v1, v2, v0, 16
; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_abs_v4i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, 0
; GFX8-NEXT: v_sub_u16_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e32 v4, 0, v1
; GFX8-NEXT: v_sub_u16_e32 v5, 0, v0
; GFX8-NEXT: v_max_i16_sdwa v3, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v0, v0, v5
; GFX8-NEXT: v_max_i16_e32 v1, v1, v4
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_abs_v4i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_sub_i16 v2, 0, v0
; GFX9-NEXT: v_pk_max_i16 v0, v0, v2
; GFX9-NEXT: v_pk_sub_i16 v2, 0, v1
; GFX9-NEXT: v_pk_max_i16 v1, v1, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_abs_v4i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_pk_sub_i16 v2, 0, v0
; GFX10-NEXT: v_pk_sub_i16 v3, 0, v1
; GFX10-NEXT: v_pk_max_i16 v0, v0, v2
; GFX10-NEXT: v_pk_max_i16 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_abs_v4i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_pk_sub_i16 v2, 0, v0
; GFX11-NEXT: v_pk_sub_i16 v3, 0, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_pk_max_i16 v0, v0, v2
; GFX11-NEXT: v_pk_max_i16 v1, v1, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_abs_v4i16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_sub_i16 v2, 0, v0
; GFX12-NEXT: v_pk_sub_i16 v3, 0, v1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-NEXT: v_pk_max_i16 v0, v0, v2
; GFX12-NEXT: v_pk_max_i16 v1, v1, v3
; GFX12-NEXT: s_setpc_b64 s[30:31]
%res = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %arg, i1 false)
ret <4 x i16> %res
}
define <6 x i16> @v_abs_v6i16(<6 x i16> %arg) {
; GFX6-LABEL: v_abs_v6i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 0, v2
; GFX6-NEXT: v_max_i32_e32 v2, v2, v6
; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 0, v3
; GFX6-NEXT: v_max_i32_e32 v3, v3, v6
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
; GFX6-NEXT: v_max_i32_e32 v0, v0, v3
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16
; GFX6-NEXT: v_max_i32_e32 v1, v1, v3
; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v5
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 0, v4
; GFX6-NEXT: v_max_i32_e32 v5, v5, v3
; GFX6-NEXT: v_max_i32_e32 v1, v4, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v5
; GFX6-NEXT: v_or_b32_e32 v4, v1, v3
; GFX6-NEXT: v_alignbit_b32 v1, v2, v0, 16
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_abs_v6i16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX7-NEXT: v_bfe_i32 v3, v3, 0, 16
; GFX7-NEXT: v_sub_i32_e32 v6, vcc, 0, v2
; GFX7-NEXT: v_max_i32_e32 v2, v2, v6
; GFX7-NEXT: v_sub_i32_e32 v6, vcc, 0, v3
; GFX7-NEXT: v_max_i32_e32 v3, v3, v6
; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_bfe_i32 v1, v1, 0, 16
; GFX7-NEXT: v_or_b32_e32 v2, v2, v3
; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
; GFX7-NEXT: v_max_i32_e32 v0, v0, v3
; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; GFX7-NEXT: v_bfe_i32 v5, v5, 0, 16
; GFX7-NEXT: v_max_i32_e32 v1, v1, v3
; GFX7-NEXT: v_bfe_i32 v4, v4, 0, 16
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v5
; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7-NEXT: v_sub_i32_e32 v1, vcc, 0, v4
; GFX7-NEXT: v_max_i32_e32 v5, v5, v3
; GFX7-NEXT: v_max_i32_e32 v1, v4, v1
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v5
; GFX7-NEXT: v_or_b32_e32 v4, v1, v3
; GFX7-NEXT: v_alignbit_b32 v1, v2, v0, 16
; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_abs_v6i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v3, 0
; GFX8-NEXT: v_sub_u16_sdwa v4, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_sdwa v5, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e32 v6, 0, v2
; GFX8-NEXT: v_sub_u16_e32 v7, 0, v1
; GFX8-NEXT: v_sub_u16_e32 v8, 0, v0
; GFX8-NEXT: v_max_i16_sdwa v4, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_sdwa v5, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_sdwa v3, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v0, v0, v8
; GFX8-NEXT: v_max_i16_e32 v1, v1, v7
; GFX8-NEXT: v_max_i16_e32 v2, v2, v6
; GFX8-NEXT: v_or_b32_e32 v0, v0, v3
; GFX8-NEXT: v_or_b32_e32 v1, v1, v5
; GFX8-NEXT: v_or_b32_e32 v2, v2, v4
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_abs_v6i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_sub_i16 v3, 0, v0
; GFX9-NEXT: v_pk_max_i16 v0, v0, v3
; GFX9-NEXT: v_pk_sub_i16 v3, 0, v1
; GFX9-NEXT: v_pk_max_i16 v1, v1, v3
; GFX9-NEXT: v_pk_sub_i16 v3, 0, v2
; GFX9-NEXT: v_pk_max_i16 v2, v2, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_abs_v6i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_pk_sub_i16 v3, 0, v0
; GFX10-NEXT: v_pk_sub_i16 v4, 0, v1
; GFX10-NEXT: v_pk_sub_i16 v5, 0, v2
; GFX10-NEXT: v_pk_max_i16 v0, v0, v3
; GFX10-NEXT: v_pk_max_i16 v1, v1, v4
; GFX10-NEXT: v_pk_max_i16 v2, v2, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_abs_v6i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_pk_sub_i16 v3, 0, v0
; GFX11-NEXT: v_pk_sub_i16 v4, 0, v1
; GFX11-NEXT: v_pk_sub_i16 v5, 0, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_pk_max_i16 v0, v0, v3
; GFX11-NEXT: v_pk_max_i16 v1, v1, v4
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-NEXT: v_pk_max_i16 v2, v2, v5
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_abs_v6i16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_sub_i16 v3, 0, v0
; GFX12-NEXT: v_pk_sub_i16 v4, 0, v1
; GFX12-NEXT: v_pk_sub_i16 v5, 0, v2
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-NEXT: v_pk_max_i16 v0, v0, v3
; GFX12-NEXT: v_pk_max_i16 v1, v1, v4
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-NEXT: v_pk_max_i16 v2, v2, v5
; GFX12-NEXT: s_setpc_b64 s[30:31]
%res = call <6 x i16> @llvm.abs.v6i16(<6 x i16> %arg, i1 false)
ret <6 x i16> %res
}
define <8 x i16> @v_abs_v8i16(<8 x i16> %arg) {
; GFX6-LABEL: v_abs_v8i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 16
; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 16
; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 0, v6
; GFX6-NEXT: v_max_i32_e32 v6, v6, v8
; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 0, v7
; GFX6-NEXT: v_max_i32_e32 v7, v7, v8
; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16
; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16
; GFX6-NEXT: v_or_b32_e32 v6, v6, v7
; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 0, v4
; GFX6-NEXT: v_max_i32_e32 v4, v4, v7
; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 0, v5
; GFX6-NEXT: v_max_i32_e32 v5, v5, v7
; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
; GFX6-NEXT: v_or_b32_e32 v4, v4, v5
; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
; GFX6-NEXT: v_max_i32_e32 v2, v2, v5
; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
; GFX6-NEXT: v_max_i32_e32 v3, v3, v5
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
; GFX6-NEXT: v_max_i32_e32 v0, v0, v3
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; GFX6-NEXT: v_max_i32_e32 v1, v1, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_alignbit_b32 v1, v2, v0, 16
; GFX6-NEXT: v_alignbit_b32 v5, v6, v4, 16
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v6
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_abs_v8i16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_bfe_i32 v6, v6, 0, 16
; GFX7-NEXT: v_bfe_i32 v7, v7, 0, 16
; GFX7-NEXT: v_sub_i32_e32 v8, vcc, 0, v6
; GFX7-NEXT: v_max_i32_e32 v6, v6, v8
; GFX7-NEXT: v_sub_i32_e32 v8, vcc, 0, v7
; GFX7-NEXT: v_max_i32_e32 v7, v7, v8
; GFX7-NEXT: v_bfe_i32 v4, v4, 0, 16
; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX7-NEXT: v_bfe_i32 v5, v5, 0, 16
; GFX7-NEXT: v_or_b32_e32 v6, v6, v7
; GFX7-NEXT: v_sub_i32_e32 v7, vcc, 0, v4
; GFX7-NEXT: v_max_i32_e32 v4, v4, v7
; GFX7-NEXT: v_sub_i32_e32 v7, vcc, 0, v5
; GFX7-NEXT: v_max_i32_e32 v5, v5, v7
; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX7-NEXT: v_bfe_i32 v3, v3, 0, 16
; GFX7-NEXT: v_or_b32_e32 v4, v4, v5
; GFX7-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
; GFX7-NEXT: v_max_i32_e32 v2, v2, v5
; GFX7-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
; GFX7-NEXT: v_max_i32_e32 v3, v3, v5
; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_bfe_i32 v1, v1, 0, 16
; GFX7-NEXT: v_or_b32_e32 v2, v2, v3
; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
; GFX7-NEXT: v_max_i32_e32 v0, v0, v3
; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; GFX7-NEXT: v_max_i32_e32 v1, v1, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7-NEXT: v_alignbit_b32 v1, v2, v0, 16
; GFX7-NEXT: v_alignbit_b32 v5, v6, v4, 16
; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v6
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_abs_v8i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v4, 0
; GFX8-NEXT: v_sub_u16_sdwa v5, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_sdwa v6, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_sdwa v7, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_sdwa v4, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e32 v8, 0, v3
; GFX8-NEXT: v_sub_u16_e32 v9, 0, v2
; GFX8-NEXT: v_sub_u16_e32 v10, 0, v1
; GFX8-NEXT: v_sub_u16_e32 v11, 0, v0
; GFX8-NEXT: v_max_i16_sdwa v5, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_sdwa v6, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_sdwa v7, v1, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_sdwa v4, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v0, v0, v11
; GFX8-NEXT: v_max_i16_e32 v1, v1, v10
; GFX8-NEXT: v_max_i16_e32 v2, v2, v9
; GFX8-NEXT: v_max_i16_e32 v3, v3, v8
; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
; GFX8-NEXT: v_or_b32_e32 v1, v1, v7
; GFX8-NEXT: v_or_b32_e32 v2, v2, v6
; GFX8-NEXT: v_or_b32_e32 v3, v3, v5
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_abs_v8i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_sub_i16 v4, 0, v0
; GFX9-NEXT: v_pk_max_i16 v0, v0, v4
; GFX9-NEXT: v_pk_sub_i16 v4, 0, v1
; GFX9-NEXT: v_pk_max_i16 v1, v1, v4
; GFX9-NEXT: v_pk_sub_i16 v4, 0, v2
; GFX9-NEXT: v_pk_max_i16 v2, v2, v4
; GFX9-NEXT: v_pk_sub_i16 v4, 0, v3
; GFX9-NEXT: v_pk_max_i16 v3, v3, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_abs_v8i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_pk_sub_i16 v4, 0, v0
; GFX10-NEXT: v_pk_sub_i16 v5, 0, v1
; GFX10-NEXT: v_pk_sub_i16 v6, 0, v2
; GFX10-NEXT: v_pk_sub_i16 v7, 0, v3
; GFX10-NEXT: v_pk_max_i16 v0, v0, v4
; GFX10-NEXT: v_pk_max_i16 v1, v1, v5
; GFX10-NEXT: v_pk_max_i16 v2, v2, v6
; GFX10-NEXT: v_pk_max_i16 v3, v3, v7
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_abs_v8i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_pk_sub_i16 v4, 0, v0
; GFX11-NEXT: v_pk_sub_i16 v5, 0, v1
; GFX11-NEXT: v_pk_sub_i16 v6, 0, v2
; GFX11-NEXT: v_pk_sub_i16 v7, 0, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_pk_max_i16 v0, v0, v4
; GFX11-NEXT: v_pk_max_i16 v1, v1, v5
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_pk_max_i16 v2, v2, v6
; GFX11-NEXT: v_pk_max_i16 v3, v3, v7
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_abs_v8i16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_sub_i16 v4, 0, v0
; GFX12-NEXT: v_pk_sub_i16 v5, 0, v1
; GFX12-NEXT: v_pk_sub_i16 v6, 0, v2
; GFX12-NEXT: v_pk_sub_i16 v7, 0, v3
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-NEXT: v_pk_max_i16 v0, v0, v4
; GFX12-NEXT: v_pk_max_i16 v1, v1, v5
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-NEXT: v_pk_max_i16 v2, v2, v6
; GFX12-NEXT: v_pk_max_i16 v3, v3, v7
; GFX12-NEXT: s_setpc_b64 s[30:31]
%res = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %arg, i1 false)
ret <8 x i16> %res
}
define <16 x i16> @v_abs_v16i16(<16 x i16> %arg) {
; GFX6-LABEL: v_abs_v16i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_bfe_i32 v14, v14, 0, 16
; GFX6-NEXT: v_bfe_i32 v15, v15, 0, 16
; GFX6-NEXT: v_sub_i32_e32 v16, vcc, 0, v14
; GFX6-NEXT: v_max_i32_e32 v14, v14, v16
; GFX6-NEXT: v_sub_i32_e32 v16, vcc, 0, v15
; GFX6-NEXT: v_max_i32_e32 v15, v15, v16
; GFX6-NEXT: v_bfe_i32 v12, v12, 0, 16
; GFX6-NEXT: v_lshlrev_b32_e32 v15, 16, v15
; GFX6-NEXT: v_bfe_i32 v13, v13, 0, 16
; GFX6-NEXT: v_or_b32_e32 v14, v14, v15
; GFX6-NEXT: v_sub_i32_e32 v15, vcc, 0, v12
; GFX6-NEXT: v_max_i32_e32 v12, v12, v15
; GFX6-NEXT: v_sub_i32_e32 v15, vcc, 0, v13
; GFX6-NEXT: v_max_i32_e32 v13, v13, v15
; GFX6-NEXT: v_bfe_i32 v10, v10, 0, 16
; GFX6-NEXT: v_lshlrev_b32_e32 v13, 16, v13
; GFX6-NEXT: v_bfe_i32 v11, v11, 0, 16
; GFX6-NEXT: v_or_b32_e32 v12, v12, v13
; GFX6-NEXT: v_sub_i32_e32 v13, vcc, 0, v10
; GFX6-NEXT: v_max_i32_e32 v10, v10, v13
; GFX6-NEXT: v_sub_i32_e32 v13, vcc, 0, v11
; GFX6-NEXT: v_max_i32_e32 v11, v11, v13
; GFX6-NEXT: v_bfe_i32 v8, v8, 0, 16
; GFX6-NEXT: v_lshlrev_b32_e32 v11, 16, v11
; GFX6-NEXT: v_bfe_i32 v9, v9, 0, 16
; GFX6-NEXT: v_or_b32_e32 v10, v10, v11
; GFX6-NEXT: v_sub_i32_e32 v11, vcc, 0, v8
; GFX6-NEXT: v_max_i32_e32 v8, v8, v11
; GFX6-NEXT: v_sub_i32_e32 v11, vcc, 0, v9
; GFX6-NEXT: v_max_i32_e32 v9, v9, v11
; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 16
; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v9
; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 16
; GFX6-NEXT: v_or_b32_e32 v8, v8, v9
; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 0, v6
; GFX6-NEXT: v_max_i32_e32 v6, v6, v9
; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 0, v7
; GFX6-NEXT: v_max_i32_e32 v7, v7, v9
; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16
; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16
; GFX6-NEXT: v_or_b32_e32 v6, v6, v7
; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 0, v4
; GFX6-NEXT: v_max_i32_e32 v4, v4, v7
; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 0, v5
; GFX6-NEXT: v_max_i32_e32 v5, v5, v7
; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
; GFX6-NEXT: v_or_b32_e32 v4, v4, v5
; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
; GFX6-NEXT: v_max_i32_e32 v2, v2, v5
; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
; GFX6-NEXT: v_max_i32_e32 v3, v3, v5
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
; GFX6-NEXT: v_max_i32_e32 v0, v0, v3
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; GFX6-NEXT: v_max_i32_e32 v1, v1, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_alignbit_b32 v1, v2, v0, 16
; GFX6-NEXT: v_alignbit_b32 v5, v6, v4, 16
; GFX6-NEXT: v_alignbit_b32 v9, v10, v8, 16
; GFX6-NEXT: v_alignbit_b32 v13, v14, v12, 16
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v6
; GFX6-NEXT: v_lshrrev_b32_e32 v11, 16, v10
; GFX6-NEXT: v_lshrrev_b32_e32 v15, 16, v14
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_abs_v16i16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_bfe_i32 v14, v14, 0, 16
; GFX7-NEXT: v_bfe_i32 v15, v15, 0, 16
; GFX7-NEXT: v_sub_i32_e32 v16, vcc, 0, v14
; GFX7-NEXT: v_max_i32_e32 v14, v14, v16
; GFX7-NEXT: v_sub_i32_e32 v16, vcc, 0, v15
; GFX7-NEXT: v_max_i32_e32 v15, v15, v16
; GFX7-NEXT: v_bfe_i32 v12, v12, 0, 16
; GFX7-NEXT: v_lshlrev_b32_e32 v15, 16, v15
; GFX7-NEXT: v_bfe_i32 v13, v13, 0, 16
; GFX7-NEXT: v_or_b32_e32 v14, v14, v15
; GFX7-NEXT: v_sub_i32_e32 v15, vcc, 0, v12
; GFX7-NEXT: v_max_i32_e32 v12, v12, v15
; GFX7-NEXT: v_sub_i32_e32 v15, vcc, 0, v13
; GFX7-NEXT: v_max_i32_e32 v13, v13, v15
; GFX7-NEXT: v_bfe_i32 v10, v10, 0, 16
; GFX7-NEXT: v_lshlrev_b32_e32 v13, 16, v13
; GFX7-NEXT: v_bfe_i32 v11, v11, 0, 16
; GFX7-NEXT: v_or_b32_e32 v12, v12, v13
; GFX7-NEXT: v_sub_i32_e32 v13, vcc, 0, v10
; GFX7-NEXT: v_max_i32_e32 v10, v10, v13
; GFX7-NEXT: v_sub_i32_e32 v13, vcc, 0, v11
; GFX7-NEXT: v_max_i32_e32 v11, v11, v13
; GFX7-NEXT: v_bfe_i32 v8, v8, 0, 16
; GFX7-NEXT: v_lshlrev_b32_e32 v11, 16, v11
; GFX7-NEXT: v_bfe_i32 v9, v9, 0, 16
; GFX7-NEXT: v_or_b32_e32 v10, v10, v11
; GFX7-NEXT: v_sub_i32_e32 v11, vcc, 0, v8
; GFX7-NEXT: v_max_i32_e32 v8, v8, v11
; GFX7-NEXT: v_sub_i32_e32 v11, vcc, 0, v9
; GFX7-NEXT: v_max_i32_e32 v9, v9, v11
; GFX7-NEXT: v_bfe_i32 v6, v6, 0, 16
; GFX7-NEXT: v_lshlrev_b32_e32 v9, 16, v9
; GFX7-NEXT: v_bfe_i32 v7, v7, 0, 16
; GFX7-NEXT: v_or_b32_e32 v8, v8, v9
; GFX7-NEXT: v_sub_i32_e32 v9, vcc, 0, v6
; GFX7-NEXT: v_max_i32_e32 v6, v6, v9
; GFX7-NEXT: v_sub_i32_e32 v9, vcc, 0, v7
; GFX7-NEXT: v_max_i32_e32 v7, v7, v9
; GFX7-NEXT: v_bfe_i32 v4, v4, 0, 16
; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX7-NEXT: v_bfe_i32 v5, v5, 0, 16
; GFX7-NEXT: v_or_b32_e32 v6, v6, v7
; GFX7-NEXT: v_sub_i32_e32 v7, vcc, 0, v4
; GFX7-NEXT: v_max_i32_e32 v4, v4, v7
; GFX7-NEXT: v_sub_i32_e32 v7, vcc, 0, v5
; GFX7-NEXT: v_max_i32_e32 v5, v5, v7
; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX7-NEXT: v_bfe_i32 v3, v3, 0, 16
; GFX7-NEXT: v_or_b32_e32 v4, v4, v5
; GFX7-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
; GFX7-NEXT: v_max_i32_e32 v2, v2, v5
; GFX7-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
; GFX7-NEXT: v_max_i32_e32 v3, v3, v5
; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_bfe_i32 v1, v1, 0, 16
; GFX7-NEXT: v_or_b32_e32 v2, v2, v3
; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
; GFX7-NEXT: v_max_i32_e32 v0, v0, v3
; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; GFX7-NEXT: v_max_i32_e32 v1, v1, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7-NEXT: v_alignbit_b32 v1, v2, v0, 16
; GFX7-NEXT: v_alignbit_b32 v5, v6, v4, 16
; GFX7-NEXT: v_alignbit_b32 v9, v10, v8, 16
; GFX7-NEXT: v_alignbit_b32 v13, v14, v12, 16
; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v6
; GFX7-NEXT: v_lshrrev_b32_e32 v11, 16, v10
; GFX7-NEXT: v_lshrrev_b32_e32 v15, 16, v14
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_abs_v16i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v8, 0
; GFX8-NEXT: v_sub_u16_sdwa v9, v8, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_sdwa v10, v8, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_sdwa v11, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_sdwa v12, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_sdwa v13, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_sdwa v14, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_sdwa v15, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_sdwa v8, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e32 v16, 0, v7
; GFX8-NEXT: v_sub_u16_e32 v17, 0, v6
; GFX8-NEXT: v_sub_u16_e32 v18, 0, v5
; GFX8-NEXT: v_sub_u16_e32 v19, 0, v4
; GFX8-NEXT: v_sub_u16_e32 v20, 0, v3
; GFX8-NEXT: v_sub_u16_e32 v21, 0, v2
; GFX8-NEXT: v_sub_u16_e32 v22, 0, v1
; GFX8-NEXT: v_sub_u16_e32 v23, 0, v0
; GFX8-NEXT: v_max_i16_sdwa v9, v7, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_sdwa v10, v6, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_sdwa v11, v5, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_sdwa v12, v4, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_sdwa v13, v3, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_sdwa v14, v2, v14 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_sdwa v15, v1, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_sdwa v8, v0, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v0, v0, v23
; GFX8-NEXT: v_max_i16_e32 v1, v1, v22
; GFX8-NEXT: v_max_i16_e32 v2, v2, v21
; GFX8-NEXT: v_max_i16_e32 v3, v3, v20
; GFX8-NEXT: v_max_i16_e32 v4, v4, v19
; GFX8-NEXT: v_max_i16_e32 v5, v5, v18
; GFX8-NEXT: v_max_i16_e32 v6, v6, v17
; GFX8-NEXT: v_max_i16_e32 v7, v7, v16
; GFX8-NEXT: v_or_b32_e32 v0, v0, v8
; GFX8-NEXT: v_or_b32_e32 v1, v1, v15
; GFX8-NEXT: v_or_b32_e32 v2, v2, v14
; GFX8-NEXT: v_or_b32_e32 v3, v3, v13
; GFX8-NEXT: v_or_b32_e32 v4, v4, v12
; GFX8-NEXT: v_or_b32_e32 v5, v5, v11
; GFX8-NEXT: v_or_b32_e32 v6, v6, v10
; GFX8-NEXT: v_or_b32_e32 v7, v7, v9
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_abs_v16i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_sub_i16 v8, 0, v0
; GFX9-NEXT: v_pk_max_i16 v0, v0, v8
; GFX9-NEXT: v_pk_sub_i16 v8, 0, v1
; GFX9-NEXT: v_pk_max_i16 v1, v1, v8
; GFX9-NEXT: v_pk_sub_i16 v8, 0, v2
; GFX9-NEXT: v_pk_max_i16 v2, v2, v8
; GFX9-NEXT: v_pk_sub_i16 v8, 0, v3
; GFX9-NEXT: v_pk_max_i16 v3, v3, v8
; GFX9-NEXT: v_pk_sub_i16 v8, 0, v4
; GFX9-NEXT: v_pk_max_i16 v4, v4, v8
; GFX9-NEXT: v_pk_sub_i16 v8, 0, v5
; GFX9-NEXT: v_pk_max_i16 v5, v5, v8
; GFX9-NEXT: v_pk_sub_i16 v8, 0, v6
; GFX9-NEXT: v_pk_max_i16 v6, v6, v8
; GFX9-NEXT: v_pk_sub_i16 v8, 0, v7
; GFX9-NEXT: v_pk_max_i16 v7, v7, v8
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_abs_v16i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_pk_sub_i16 v8, 0, v0
; GFX10-NEXT: v_pk_sub_i16 v9, 0, v1
; GFX10-NEXT: v_pk_sub_i16 v10, 0, v2
; GFX10-NEXT: v_pk_sub_i16 v11, 0, v6
; GFX10-NEXT: v_pk_sub_i16 v12, 0, v7
; GFX10-NEXT: v_pk_max_i16 v0, v0, v8
; GFX10-NEXT: v_pk_max_i16 v1, v1, v9
; GFX10-NEXT: v_pk_max_i16 v2, v2, v10
; GFX10-NEXT: v_pk_sub_i16 v8, 0, v3
; GFX10-NEXT: v_pk_sub_i16 v9, 0, v4
; GFX10-NEXT: v_pk_sub_i16 v10, 0, v5
; GFX10-NEXT: v_pk_max_i16 v6, v6, v11
; GFX10-NEXT: v_pk_max_i16 v7, v7, v12
; GFX10-NEXT: v_pk_max_i16 v3, v3, v8
; GFX10-NEXT: v_pk_max_i16 v4, v4, v9
; GFX10-NEXT: v_pk_max_i16 v5, v5, v10
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_abs_v16i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_pk_sub_i16 v8, 0, v0
; GFX11-NEXT: v_pk_sub_i16 v9, 0, v1
; GFX11-NEXT: v_pk_sub_i16 v10, 0, v2
; GFX11-NEXT: v_pk_sub_i16 v11, 0, v6
; GFX11-NEXT: v_pk_sub_i16 v12, 0, v7
; GFX11-NEXT: v_pk_max_i16 v0, v0, v8
; GFX11-NEXT: v_pk_max_i16 v1, v1, v9
; GFX11-NEXT: v_pk_max_i16 v2, v2, v10
; GFX11-NEXT: v_pk_sub_i16 v8, 0, v3
; GFX11-NEXT: v_pk_sub_i16 v9, 0, v4
; GFX11-NEXT: v_pk_sub_i16 v10, 0, v5
; GFX11-NEXT: v_pk_max_i16 v6, v6, v11
; GFX11-NEXT: v_pk_max_i16 v7, v7, v12
; GFX11-NEXT: v_pk_max_i16 v3, v3, v8
; GFX11-NEXT: v_pk_max_i16 v4, v4, v9
; GFX11-NEXT: v_pk_max_i16 v5, v5, v10
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_abs_v16i16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_sub_i16 v8, 0, v0
; GFX12-NEXT: v_pk_sub_i16 v9, 0, v1
; GFX12-NEXT: v_pk_sub_i16 v10, 0, v2
; GFX12-NEXT: v_pk_sub_i16 v11, 0, v6
; GFX12-NEXT: v_pk_sub_i16 v12, 0, v7
; GFX12-NEXT: v_pk_max_i16 v0, v0, v8
; GFX12-NEXT: v_pk_max_i16 v1, v1, v9
; GFX12-NEXT: v_pk_max_i16 v2, v2, v10
; GFX12-NEXT: v_pk_sub_i16 v8, 0, v3
; GFX12-NEXT: v_pk_sub_i16 v9, 0, v4
; GFX12-NEXT: v_pk_sub_i16 v10, 0, v5
; GFX12-NEXT: v_pk_max_i16 v6, v6, v11
; GFX12-NEXT: v_pk_max_i16 v7, v7, v12
; GFX12-NEXT: v_pk_max_i16 v3, v3, v8
; GFX12-NEXT: v_pk_max_i16 v4, v4, v9
; GFX12-NEXT: v_pk_max_i16 v5, v5, v10
; GFX12-NEXT: s_setpc_b64 s[30:31]
%res = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %arg, i1 false)
ret <16 x i16> %res
}
define <32 x i16> @v_abs_v32i16(<32 x i16> %arg) {
; GFX6-LABEL: v_abs_v32i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_bfe_i32 v28, v28, 0, 16
; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v28
; GFX6-NEXT: v_bfe_i32 v29, v29, 0, 16
; GFX6-NEXT: v_max_i32_e32 v28, v28, v31
; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v29
; GFX6-NEXT: v_bfe_i32 v30, v30, 0, 16
; GFX6-NEXT: v_max_i32_e32 v29, v29, v31
; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v30
; GFX6-NEXT: v_bfe_i32 v26, v26, 0, 16
; GFX6-NEXT: v_max_i32_e32 v30, v30, v31
; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v26
; GFX6-NEXT: v_bfe_i32 v27, v27, 0, 16
; GFX6-NEXT: v_max_i32_e32 v26, v26, v31
; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v27
; GFX6-NEXT: v_bfe_i32 v24, v24, 0, 16
; GFX6-NEXT: v_max_i32_e32 v27, v27, v31
; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v24
; GFX6-NEXT: v_bfe_i32 v25, v25, 0, 16
; GFX6-NEXT: v_max_i32_e32 v24, v24, v31
; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v25
; GFX6-NEXT: v_bfe_i32 v22, v22, 0, 16
; GFX6-NEXT: v_max_i32_e32 v25, v25, v31
; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v22
; GFX6-NEXT: v_bfe_i32 v23, v23, 0, 16
; GFX6-NEXT: v_max_i32_e32 v22, v22, v31
; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v23
; GFX6-NEXT: v_max_i32_e32 v23, v23, v31
; GFX6-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX6-NEXT: v_lshlrev_b32_e32 v23, 16, v23
; GFX6-NEXT: v_lshlrev_b32_e32 v25, 16, v25
; GFX6-NEXT: v_or_b32_e32 v22, v22, v23
; GFX6-NEXT: v_or_b32_e32 v24, v24, v25
; GFX6-NEXT: v_bfe_i32 v21, v21, 0, 16
; GFX6-NEXT: v_bfe_i32 v20, v20, 0, 16
; GFX6-NEXT: v_lshlrev_b32_e32 v29, 16, v29
; GFX6-NEXT: v_or_b32_e32 v28, v28, v29
; GFX6-NEXT: v_sub_i32_e32 v29, vcc, 0, v20
; GFX6-NEXT: v_max_i32_e32 v20, v20, v29
; GFX6-NEXT: v_bfe_i32 v18, v18, 0, 16
; GFX6-NEXT: v_bfe_i32 v19, v19, 0, 16
; GFX6-NEXT: v_bfe_i32 v16, v16, 0, 16
; GFX6-NEXT: v_bfe_i32 v17, v17, 0, 16
; GFX6-NEXT: v_bfe_i32 v14, v14, 0, 16
; GFX6-NEXT: v_bfe_i32 v15, v15, 0, 16
; GFX6-NEXT: v_bfe_i32 v12, v12, 0, 16
; GFX6-NEXT: v_bfe_i32 v13, v13, 0, 16
; GFX6-NEXT: v_bfe_i32 v10, v10, 0, 16
; GFX6-NEXT: v_bfe_i32 v11, v11, 0, 16
; GFX6-NEXT: v_bfe_i32 v8, v8, 0, 16
; GFX6-NEXT: v_bfe_i32 v9, v9, 0, 16
; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 16
; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 16
; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16
; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16
; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16
; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16
; GFX6-NEXT: v_lshlrev_b32_e32 v27, 16, v27
; GFX6-NEXT: v_or_b32_e32 v26, v26, v27
; GFX6-NEXT: v_lshrrev_b32_e32 v27, 16, v26
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_bfe_i32 v23, v31, 0, 16
; GFX6-NEXT: v_sub_i32_e32 v25, vcc, 0, v23
; GFX6-NEXT: v_max_i32_e32 v23, v23, v25
; GFX6-NEXT: v_lshlrev_b32_e32 v23, 16, v23
; GFX6-NEXT: v_or_b32_e32 v30, v30, v23
; GFX6-NEXT: v_sub_i32_e32 v23, vcc, 0, v21
; GFX6-NEXT: v_max_i32_e32 v21, v21, v23
; GFX6-NEXT: v_lshlrev_b32_e32 v21, 16, v21
; GFX6-NEXT: v_or_b32_e32 v20, v20, v21
; GFX6-NEXT: v_sub_i32_e32 v21, vcc, 0, v18
; GFX6-NEXT: v_max_i32_e32 v18, v18, v21
; GFX6-NEXT: v_sub_i32_e32 v21, vcc, 0, v19
; GFX6-NEXT: v_max_i32_e32 v19, v19, v21
; GFX6-NEXT: v_lshlrev_b32_e32 v19, 16, v19
; GFX6-NEXT: v_or_b32_e32 v18, v18, v19
; GFX6-NEXT: v_sub_i32_e32 v19, vcc, 0, v16
; GFX6-NEXT: v_max_i32_e32 v16, v16, v19
; GFX6-NEXT: v_sub_i32_e32 v19, vcc, 0, v17
; GFX6-NEXT: v_max_i32_e32 v17, v17, v19
; GFX6-NEXT: v_lshlrev_b32_e32 v17, 16, v17
; GFX6-NEXT: v_or_b32_e32 v16, v16, v17
; GFX6-NEXT: v_sub_i32_e32 v17, vcc, 0, v14
; GFX6-NEXT: v_max_i32_e32 v14, v14, v17
; GFX6-NEXT: v_sub_i32_e32 v17, vcc, 0, v15
; GFX6-NEXT: v_max_i32_e32 v15, v15, v17
; GFX6-NEXT: v_lshlrev_b32_e32 v15, 16, v15
; GFX6-NEXT: v_or_b32_e32 v14, v14, v15
; GFX6-NEXT: v_sub_i32_e32 v15, vcc, 0, v12
; GFX6-NEXT: v_max_i32_e32 v12, v12, v15
; GFX6-NEXT: v_sub_i32_e32 v15, vcc, 0, v13
; GFX6-NEXT: v_max_i32_e32 v13, v13, v15
; GFX6-NEXT: v_lshlrev_b32_e32 v13, 16, v13
; GFX6-NEXT: v_or_b32_e32 v12, v12, v13
; GFX6-NEXT: v_sub_i32_e32 v13, vcc, 0, v10
; GFX6-NEXT: v_max_i32_e32 v10, v10, v13
; GFX6-NEXT: v_sub_i32_e32 v13, vcc, 0, v11
; GFX6-NEXT: v_max_i32_e32 v11, v11, v13
; GFX6-NEXT: v_lshlrev_b32_e32 v11, 16, v11
; GFX6-NEXT: v_or_b32_e32 v10, v10, v11
; GFX6-NEXT: v_sub_i32_e32 v11, vcc, 0, v8
; GFX6-NEXT: v_max_i32_e32 v8, v8, v11
; GFX6-NEXT: v_sub_i32_e32 v11, vcc, 0, v9
; GFX6-NEXT: v_max_i32_e32 v9, v9, v11
; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v9
; GFX6-NEXT: v_or_b32_e32 v8, v8, v9
; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 0, v6
; GFX6-NEXT: v_max_i32_e32 v6, v6, v9
; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 0, v7
; GFX6-NEXT: v_max_i32_e32 v7, v7, v9
; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX6-NEXT: v_or_b32_e32 v6, v6, v7
; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 0, v4
; GFX6-NEXT: v_max_i32_e32 v4, v4, v7
; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 0, v5
; GFX6-NEXT: v_max_i32_e32 v5, v5, v7
; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX6-NEXT: v_or_b32_e32 v4, v4, v5
; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
; GFX6-NEXT: v_max_i32_e32 v2, v2, v5
; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
; GFX6-NEXT: v_max_i32_e32 v3, v3, v5
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
; GFX6-NEXT: v_max_i32_e32 v0, v0, v3
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; GFX6-NEXT: v_max_i32_e32 v1, v1, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_alignbit_b32 v1, v2, v0, 16
; GFX6-NEXT: v_alignbit_b32 v5, v6, v4, 16
; GFX6-NEXT: v_alignbit_b32 v9, v10, v8, 16
; GFX6-NEXT: v_alignbit_b32 v13, v14, v12, 16
; GFX6-NEXT: v_alignbit_b32 v17, v18, v16, 16
; GFX6-NEXT: v_alignbit_b32 v21, v22, v20, 16
; GFX6-NEXT: v_alignbit_b32 v25, v26, v24, 16
; GFX6-NEXT: v_alignbit_b32 v29, v30, v28, 16
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v6
; GFX6-NEXT: v_lshrrev_b32_e32 v11, 16, v10
; GFX6-NEXT: v_lshrrev_b32_e32 v15, 16, v14
; GFX6-NEXT: v_lshrrev_b32_e32 v19, 16, v18
; GFX6-NEXT: v_lshrrev_b32_e32 v23, 16, v22
; GFX6-NEXT: v_lshrrev_b32_e32 v31, 16, v30
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_abs_v32i16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_bfe_i32 v28, v28, 0, 16
; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v28
; GFX7-NEXT: v_bfe_i32 v29, v29, 0, 16
; GFX7-NEXT: v_max_i32_e32 v28, v28, v31
; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v29
; GFX7-NEXT: v_bfe_i32 v30, v30, 0, 16
; GFX7-NEXT: v_max_i32_e32 v29, v29, v31
; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v30
; GFX7-NEXT: v_bfe_i32 v26, v26, 0, 16
; GFX7-NEXT: v_max_i32_e32 v30, v30, v31
; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v26
; GFX7-NEXT: v_bfe_i32 v27, v27, 0, 16
; GFX7-NEXT: v_max_i32_e32 v26, v26, v31
; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v27
; GFX7-NEXT: v_bfe_i32 v24, v24, 0, 16
; GFX7-NEXT: v_max_i32_e32 v27, v27, v31
; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v24
; GFX7-NEXT: v_bfe_i32 v25, v25, 0, 16
; GFX7-NEXT: v_max_i32_e32 v24, v24, v31
; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v25
; GFX7-NEXT: v_bfe_i32 v22, v22, 0, 16
; GFX7-NEXT: v_max_i32_e32 v25, v25, v31
; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v22
; GFX7-NEXT: v_bfe_i32 v23, v23, 0, 16
; GFX7-NEXT: v_max_i32_e32 v22, v22, v31
; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v23
; GFX7-NEXT: v_max_i32_e32 v23, v23, v31
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX7-NEXT: v_lshlrev_b32_e32 v23, 16, v23
; GFX7-NEXT: v_lshlrev_b32_e32 v25, 16, v25
; GFX7-NEXT: v_or_b32_e32 v22, v22, v23
; GFX7-NEXT: v_or_b32_e32 v24, v24, v25
; GFX7-NEXT: v_bfe_i32 v21, v21, 0, 16
; GFX7-NEXT: v_bfe_i32 v20, v20, 0, 16
; GFX7-NEXT: v_lshlrev_b32_e32 v29, 16, v29
; GFX7-NEXT: v_or_b32_e32 v28, v28, v29
; GFX7-NEXT: v_sub_i32_e32 v29, vcc, 0, v20
; GFX7-NEXT: v_max_i32_e32 v20, v20, v29
; GFX7-NEXT: v_bfe_i32 v18, v18, 0, 16
; GFX7-NEXT: v_bfe_i32 v19, v19, 0, 16
; GFX7-NEXT: v_bfe_i32 v16, v16, 0, 16
; GFX7-NEXT: v_bfe_i32 v17, v17, 0, 16
; GFX7-NEXT: v_bfe_i32 v14, v14, 0, 16
; GFX7-NEXT: v_bfe_i32 v15, v15, 0, 16
; GFX7-NEXT: v_bfe_i32 v12, v12, 0, 16
; GFX7-NEXT: v_bfe_i32 v13, v13, 0, 16
; GFX7-NEXT: v_bfe_i32 v10, v10, 0, 16
; GFX7-NEXT: v_bfe_i32 v11, v11, 0, 16
; GFX7-NEXT: v_bfe_i32 v8, v8, 0, 16
; GFX7-NEXT: v_bfe_i32 v9, v9, 0, 16
; GFX7-NEXT: v_bfe_i32 v6, v6, 0, 16
; GFX7-NEXT: v_bfe_i32 v7, v7, 0, 16
; GFX7-NEXT: v_bfe_i32 v4, v4, 0, 16
; GFX7-NEXT: v_bfe_i32 v5, v5, 0, 16
; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX7-NEXT: v_bfe_i32 v3, v3, 0, 16
; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX7-NEXT: v_bfe_i32 v1, v1, 0, 16
; GFX7-NEXT: v_lshlrev_b32_e32 v27, 16, v27
; GFX7-NEXT: v_or_b32_e32 v26, v26, v27
; GFX7-NEXT: v_lshrrev_b32_e32 v27, 16, v26
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_bfe_i32 v23, v31, 0, 16
; GFX7-NEXT: v_sub_i32_e32 v25, vcc, 0, v23
; GFX7-NEXT: v_max_i32_e32 v23, v23, v25
; GFX7-NEXT: v_lshlrev_b32_e32 v23, 16, v23
; GFX7-NEXT: v_or_b32_e32 v30, v30, v23
; GFX7-NEXT: v_sub_i32_e32 v23, vcc, 0, v21
; GFX7-NEXT: v_max_i32_e32 v21, v21, v23
; GFX7-NEXT: v_lshlrev_b32_e32 v21, 16, v21
; GFX7-NEXT: v_or_b32_e32 v20, v20, v21
; GFX7-NEXT: v_sub_i32_e32 v21, vcc, 0, v18
; GFX7-NEXT: v_max_i32_e32 v18, v18, v21
; GFX7-NEXT: v_sub_i32_e32 v21, vcc, 0, v19
; GFX7-NEXT: v_max_i32_e32 v19, v19, v21
; GFX7-NEXT: v_lshlrev_b32_e32 v19, 16, v19
; GFX7-NEXT: v_or_b32_e32 v18, v18, v19
; GFX7-NEXT: v_sub_i32_e32 v19, vcc, 0, v16
; GFX7-NEXT: v_max_i32_e32 v16, v16, v19
; GFX7-NEXT: v_sub_i32_e32 v19, vcc, 0, v17
; GFX7-NEXT: v_max_i32_e32 v17, v17, v19
; GFX7-NEXT: v_lshlrev_b32_e32 v17, 16, v17
; GFX7-NEXT: v_or_b32_e32 v16, v16, v17
; GFX7-NEXT: v_sub_i32_e32 v17, vcc, 0, v14
; GFX7-NEXT: v_max_i32_e32 v14, v14, v17
; GFX7-NEXT: v_sub_i32_e32 v17, vcc, 0, v15
; GFX7-NEXT: v_max_i32_e32 v15, v15, v17
; GFX7-NEXT: v_lshlrev_b32_e32 v15, 16, v15
; GFX7-NEXT: v_or_b32_e32 v14, v14, v15
; GFX7-NEXT: v_sub_i32_e32 v15, vcc, 0, v12
; GFX7-NEXT: v_max_i32_e32 v12, v12, v15
; GFX7-NEXT: v_sub_i32_e32 v15, vcc, 0, v13
; GFX7-NEXT: v_max_i32_e32 v13, v13, v15
; GFX7-NEXT: v_lshlrev_b32_e32 v13, 16, v13
; GFX7-NEXT: v_or_b32_e32 v12, v12, v13
; GFX7-NEXT: v_sub_i32_e32 v13, vcc, 0, v10
; GFX7-NEXT: v_max_i32_e32 v10, v10, v13
; GFX7-NEXT: v_sub_i32_e32 v13, vcc, 0, v11
; GFX7-NEXT: v_max_i32_e32 v11, v11, v13
; GFX7-NEXT: v_lshlrev_b32_e32 v11, 16, v11
; GFX7-NEXT: v_or_b32_e32 v10, v10, v11
; GFX7-NEXT: v_sub_i32_e32 v11, vcc, 0, v8
; GFX7-NEXT: v_max_i32_e32 v8, v8, v11
; GFX7-NEXT: v_sub_i32_e32 v11, vcc, 0, v9
; GFX7-NEXT: v_max_i32_e32 v9, v9, v11
; GFX7-NEXT: v_lshlrev_b32_e32 v9, 16, v9
; GFX7-NEXT: v_or_b32_e32 v8, v8, v9
; GFX7-NEXT: v_sub_i32_e32 v9, vcc, 0, v6
; GFX7-NEXT: v_max_i32_e32 v6, v6, v9
; GFX7-NEXT: v_sub_i32_e32 v9, vcc, 0, v7
; GFX7-NEXT: v_max_i32_e32 v7, v7, v9
; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX7-NEXT: v_or_b32_e32 v6, v6, v7
; GFX7-NEXT: v_sub_i32_e32 v7, vcc, 0, v4
; GFX7-NEXT: v_max_i32_e32 v4, v4, v7
; GFX7-NEXT: v_sub_i32_e32 v7, vcc, 0, v5
; GFX7-NEXT: v_max_i32_e32 v5, v5, v7
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX7-NEXT: v_or_b32_e32 v4, v4, v5
; GFX7-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
; GFX7-NEXT: v_max_i32_e32 v2, v2, v5
; GFX7-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
; GFX7-NEXT: v_max_i32_e32 v3, v3, v5
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_or_b32_e32 v2, v2, v3
; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v0
; GFX7-NEXT: v_max_i32_e32 v0, v0, v3
; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
; GFX7-NEXT: v_max_i32_e32 v1, v1, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7-NEXT: v_alignbit_b32 v1, v2, v0, 16
; GFX7-NEXT: v_alignbit_b32 v5, v6, v4, 16
; GFX7-NEXT: v_alignbit_b32 v9, v10, v8, 16
; GFX7-NEXT: v_alignbit_b32 v13, v14, v12, 16
; GFX7-NEXT: v_alignbit_b32 v17, v18, v16, 16
; GFX7-NEXT: v_alignbit_b32 v21, v22, v20, 16
; GFX7-NEXT: v_alignbit_b32 v25, v26, v24, 16
; GFX7-NEXT: v_alignbit_b32 v29, v30, v28, 16
; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v6
; GFX7-NEXT: v_lshrrev_b32_e32 v11, 16, v10
; GFX7-NEXT: v_lshrrev_b32_e32 v15, 16, v14
; GFX7-NEXT: v_lshrrev_b32_e32 v19, 16, v18
; GFX7-NEXT: v_lshrrev_b32_e32 v23, 16, v22
; GFX7-NEXT: v_lshrrev_b32_e32 v31, 16, v30
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_abs_v32i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v16, 0
; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e32 v20, 0, v0
; GFX8-NEXT: v_max_i16_sdwa v19, v0, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v0, v0, v20
; GFX8-NEXT: v_sub_u16_sdwa v20, v16, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_or_b32_e32 v0, v0, v19
; GFX8-NEXT: v_sub_u16_e32 v19, 0, v1
; GFX8-NEXT: v_max_i16_sdwa v20, v1, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v1, v1, v19
; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_or_b32_e32 v1, v1, v20
; GFX8-NEXT: v_sub_u16_e32 v20, 0, v2
; GFX8-NEXT: v_max_i16_sdwa v19, v2, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v2, v2, v20
; GFX8-NEXT: v_sub_u16_sdwa v20, v16, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_or_b32_e32 v2, v2, v19
; GFX8-NEXT: v_sub_u16_e32 v19, 0, v3
; GFX8-NEXT: v_max_i16_sdwa v20, v3, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v3, v3, v19
; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_or_b32_e32 v3, v3, v20
; GFX8-NEXT: v_sub_u16_e32 v20, 0, v4
; GFX8-NEXT: v_max_i16_sdwa v19, v4, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v4, v4, v20
; GFX8-NEXT: v_sub_u16_sdwa v20, v16, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_or_b32_e32 v4, v4, v19
; GFX8-NEXT: v_sub_u16_e32 v19, 0, v5
; GFX8-NEXT: v_max_i16_sdwa v20, v5, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v5, v5, v19
; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_or_b32_e32 v5, v5, v20
; GFX8-NEXT: v_sub_u16_e32 v20, 0, v6
; GFX8-NEXT: v_max_i16_sdwa v19, v6, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v6, v6, v20
; GFX8-NEXT: v_sub_u16_sdwa v20, v16, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_or_b32_e32 v6, v6, v19
; GFX8-NEXT: v_sub_u16_e32 v19, 0, v7
; GFX8-NEXT: v_max_i16_sdwa v20, v7, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v7, v7, v19
; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_or_b32_e32 v7, v7, v20
; GFX8-NEXT: v_sub_u16_e32 v20, 0, v8
; GFX8-NEXT: v_max_i16_sdwa v19, v8, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v8, v8, v20
; GFX8-NEXT: v_sub_u16_sdwa v20, v16, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_or_b32_e32 v8, v8, v19
; GFX8-NEXT: v_sub_u16_e32 v19, 0, v9
; GFX8-NEXT: v_max_i16_sdwa v20, v9, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v9, v9, v19
; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_or_b32_e32 v9, v9, v20
; GFX8-NEXT: v_sub_u16_e32 v20, 0, v10
; GFX8-NEXT: v_max_i16_sdwa v19, v10, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v10, v10, v20
; GFX8-NEXT: v_sub_u16_sdwa v20, v16, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_or_b32_e32 v10, v10, v19
; GFX8-NEXT: v_sub_u16_e32 v19, 0, v11
; GFX8-NEXT: v_max_i16_sdwa v20, v11, v20 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v11, v11, v19
; GFX8-NEXT: v_sub_u16_sdwa v17, v16, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_sdwa v18, v16, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_sdwa v16, v16, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_or_b32_e32 v11, v11, v20
; GFX8-NEXT: v_sub_u16_e32 v20, 0, v12
; GFX8-NEXT: v_max_i16_sdwa v16, v12, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v12, v12, v20
; GFX8-NEXT: v_or_b32_e32 v12, v12, v16
; GFX8-NEXT: v_sub_u16_e32 v16, 0, v13
; GFX8-NEXT: v_max_i16_sdwa v19, v13, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_sub_u16_e32 v20, 0, v15
; GFX8-NEXT: v_max_i16_e32 v13, v13, v16
; GFX8-NEXT: v_sub_u16_e32 v16, 0, v14
; GFX8-NEXT: v_max_i16_sdwa v17, v15, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_sdwa v18, v14, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_max_i16_e32 v14, v14, v16
; GFX8-NEXT: v_max_i16_e32 v15, v15, v20
; GFX8-NEXT: v_or_b32_e32 v13, v13, v19
; GFX8-NEXT: v_or_b32_e32 v14, v14, v18
; GFX8-NEXT: v_or_b32_e32 v15, v15, v17
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_abs_v32i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_sub_i16 v16, 0, v0
; GFX9-NEXT: v_pk_max_i16 v0, v0, v16
; GFX9-NEXT: v_pk_sub_i16 v16, 0, v1
; GFX9-NEXT: v_pk_max_i16 v1, v1, v16
; GFX9-NEXT: v_pk_sub_i16 v16, 0, v2
; GFX9-NEXT: v_pk_max_i16 v2, v2, v16
; GFX9-NEXT: v_pk_sub_i16 v16, 0, v3
; GFX9-NEXT: v_pk_max_i16 v3, v3, v16
; GFX9-NEXT: v_pk_sub_i16 v16, 0, v4
; GFX9-NEXT: v_pk_max_i16 v4, v4, v16
; GFX9-NEXT: v_pk_sub_i16 v16, 0, v5
; GFX9-NEXT: v_pk_max_i16 v5, v5, v16
; GFX9-NEXT: v_pk_sub_i16 v16, 0, v6
; GFX9-NEXT: v_pk_max_i16 v6, v6, v16
; GFX9-NEXT: v_pk_sub_i16 v16, 0, v7
; GFX9-NEXT: v_pk_max_i16 v7, v7, v16
; GFX9-NEXT: v_pk_sub_i16 v16, 0, v8
; GFX9-NEXT: v_pk_max_i16 v8, v8, v16
; GFX9-NEXT: v_pk_sub_i16 v16, 0, v9
; GFX9-NEXT: v_pk_max_i16 v9, v9, v16
; GFX9-NEXT: v_pk_sub_i16 v16, 0, v10
; GFX9-NEXT: v_pk_max_i16 v10, v10, v16
; GFX9-NEXT: v_pk_sub_i16 v16, 0, v11
; GFX9-NEXT: v_pk_max_i16 v11, v11, v16
; GFX9-NEXT: v_pk_sub_i16 v16, 0, v12
; GFX9-NEXT: v_pk_max_i16 v12, v12, v16
; GFX9-NEXT: v_pk_sub_i16 v16, 0, v13
; GFX9-NEXT: v_pk_max_i16 v13, v13, v16
; GFX9-NEXT: v_pk_sub_i16 v16, 0, v14
; GFX9-NEXT: v_pk_max_i16 v14, v14, v16
; GFX9-NEXT: v_pk_sub_i16 v16, 0, v15
; GFX9-NEXT: v_pk_max_i16 v15, v15, v16
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_abs_v32i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_pk_sub_i16 v16, 0, v0
; GFX10-NEXT: v_pk_sub_i16 v17, 0, v2
; GFX10-NEXT: v_pk_sub_i16 v18, 0, v3
; GFX10-NEXT: v_pk_sub_i16 v19, 0, v4
; GFX10-NEXT: v_pk_sub_i16 v20, 0, v5
; GFX10-NEXT: v_pk_max_i16 v0, v0, v16
; GFX10-NEXT: v_pk_sub_i16 v16, 0, v1
; GFX10-NEXT: v_pk_max_i16 v2, v2, v17
; GFX10-NEXT: v_pk_max_i16 v3, v3, v18
; GFX10-NEXT: v_pk_max_i16 v4, v4, v19
; GFX10-NEXT: v_pk_max_i16 v5, v5, v20
; GFX10-NEXT: v_pk_max_i16 v1, v1, v16
; GFX10-NEXT: v_pk_sub_i16 v16, 0, v6
; GFX10-NEXT: v_pk_sub_i16 v17, 0, v7
; GFX10-NEXT: v_pk_sub_i16 v18, 0, v8
; GFX10-NEXT: v_pk_sub_i16 v19, 0, v9
; GFX10-NEXT: v_pk_sub_i16 v20, 0, v10
; GFX10-NEXT: v_pk_max_i16 v6, v6, v16
; GFX10-NEXT: v_pk_max_i16 v7, v7, v17
; GFX10-NEXT: v_pk_max_i16 v8, v8, v18
; GFX10-NEXT: v_pk_max_i16 v9, v9, v19
; GFX10-NEXT: v_pk_max_i16 v10, v10, v20
; GFX10-NEXT: v_pk_sub_i16 v16, 0, v11
; GFX10-NEXT: v_pk_sub_i16 v17, 0, v12
; GFX10-NEXT: v_pk_sub_i16 v18, 0, v13
; GFX10-NEXT: v_pk_sub_i16 v19, 0, v14
; GFX10-NEXT: v_pk_sub_i16 v20, 0, v15
; GFX10-NEXT: v_pk_max_i16 v11, v11, v16
; GFX10-NEXT: v_pk_max_i16 v12, v12, v17
; GFX10-NEXT: v_pk_max_i16 v13, v13, v18
; GFX10-NEXT: v_pk_max_i16 v14, v14, v19
; GFX10-NEXT: v_pk_max_i16 v15, v15, v20
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_abs_v32i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_pk_sub_i16 v16, 0, v0
; GFX11-NEXT: v_pk_sub_i16 v17, 0, v2
; GFX11-NEXT: v_pk_sub_i16 v18, 0, v3
; GFX11-NEXT: v_pk_sub_i16 v19, 0, v4
; GFX11-NEXT: v_pk_sub_i16 v20, 0, v5
; GFX11-NEXT: v_pk_max_i16 v0, v0, v16
; GFX11-NEXT: v_pk_sub_i16 v16, 0, v1
; GFX11-NEXT: v_pk_max_i16 v2, v2, v17
; GFX11-NEXT: v_pk_max_i16 v3, v3, v18
; GFX11-NEXT: v_pk_max_i16 v4, v4, v19
; GFX11-NEXT: v_pk_max_i16 v5, v5, v20
; GFX11-NEXT: v_pk_max_i16 v1, v1, v16
; GFX11-NEXT: v_pk_sub_i16 v16, 0, v6
; GFX11-NEXT: v_pk_sub_i16 v17, 0, v7
; GFX11-NEXT: v_pk_sub_i16 v18, 0, v8
; GFX11-NEXT: v_pk_sub_i16 v19, 0, v9
; GFX11-NEXT: v_pk_sub_i16 v20, 0, v10
; GFX11-NEXT: v_pk_max_i16 v6, v6, v16
; GFX11-NEXT: v_pk_max_i16 v7, v7, v17
; GFX11-NEXT: v_pk_max_i16 v8, v8, v18
; GFX11-NEXT: v_pk_max_i16 v9, v9, v19
; GFX11-NEXT: v_pk_max_i16 v10, v10, v20
; GFX11-NEXT: v_pk_sub_i16 v16, 0, v11
; GFX11-NEXT: v_pk_sub_i16 v17, 0, v12
; GFX11-NEXT: v_pk_sub_i16 v18, 0, v13
; GFX11-NEXT: v_pk_sub_i16 v19, 0, v14
; GFX11-NEXT: v_pk_sub_i16 v20, 0, v15
; GFX11-NEXT: v_pk_max_i16 v11, v11, v16
; GFX11-NEXT: v_pk_max_i16 v12, v12, v17
; GFX11-NEXT: v_pk_max_i16 v13, v13, v18
; GFX11-NEXT: v_pk_max_i16 v14, v14, v19
; GFX11-NEXT: v_pk_max_i16 v15, v15, v20
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_abs_v32i16:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: s_wait_expcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_pk_sub_i16 v16, 0, v0
; GFX12-NEXT: v_pk_sub_i16 v17, 0, v2
; GFX12-NEXT: v_pk_sub_i16 v18, 0, v3
; GFX12-NEXT: v_pk_sub_i16 v19, 0, v4
; GFX12-NEXT: v_pk_sub_i16 v20, 0, v5
; GFX12-NEXT: v_pk_max_i16 v0, v0, v16
; GFX12-NEXT: v_pk_sub_i16 v16, 0, v1
; GFX12-NEXT: v_pk_max_i16 v2, v2, v17
; GFX12-NEXT: v_pk_max_i16 v3, v3, v18
; GFX12-NEXT: v_pk_max_i16 v4, v4, v19
; GFX12-NEXT: v_pk_max_i16 v5, v5, v20
; GFX12-NEXT: v_pk_max_i16 v1, v1, v16
; GFX12-NEXT: v_pk_sub_i16 v16, 0, v6
; GFX12-NEXT: v_pk_sub_i16 v17, 0, v7
; GFX12-NEXT: v_pk_sub_i16 v18, 0, v8
; GFX12-NEXT: v_pk_sub_i16 v19, 0, v9
; GFX12-NEXT: v_pk_sub_i16 v20, 0, v10
; GFX12-NEXT: v_pk_max_i16 v6, v6, v16
; GFX12-NEXT: v_pk_max_i16 v7, v7, v17
; GFX12-NEXT: v_pk_max_i16 v8, v8, v18
; GFX12-NEXT: v_pk_max_i16 v9, v9, v19
; GFX12-NEXT: v_pk_max_i16 v10, v10, v20
; GFX12-NEXT: v_pk_sub_i16 v16, 0, v11
; GFX12-NEXT: v_pk_sub_i16 v17, 0, v12
; GFX12-NEXT: v_pk_sub_i16 v18, 0, v13
; GFX12-NEXT: v_pk_sub_i16 v19, 0, v14
; GFX12-NEXT: v_pk_sub_i16 v20, 0, v15
; GFX12-NEXT: v_pk_max_i16 v11, v11, v16
; GFX12-NEXT: v_pk_max_i16 v12, v12, v17
; GFX12-NEXT: v_pk_max_i16 v13, v13, v18
; GFX12-NEXT: v_pk_max_i16 v14, v14, v19
; GFX12-NEXT: v_pk_max_i16 v15, v15, v20
; GFX12-NEXT: s_setpc_b64 s[30:31]
%res = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %arg, i1 false)
ret <32 x i16> %res
}