; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX6 %s
; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX7 %s
; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
declare i32 @llvm.get.fpmode.i32()
define i32 @func_fpmode_i32() {
; GFX678-LABEL: func_fpmode_i32:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
ret i32 %fpmode
}
define i32 @strictfp_func_fpmode_i32() strictfp {
; GFX678-LABEL: strictfp_func_fpmode_i32:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: strictfp_func_fpmode_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: strictfp_func_fpmode_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: strictfp_func_fpmode_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32() strictfp
ret i32 %fpmode
}
define amdgpu_kernel void @kernel_fpmode_i32(ptr addrspace(1) %ptr) {
; GFX6-LABEL: kernel_fpmode_i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; GFX6-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX6-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: v_mov_b32_e32 v0, s4
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: kernel_fpmode_i32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX7-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: s_mov_b32 s2, -1
; GFX7-NEXT: v_mov_b32_e32 v0, s4
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX7-NEXT: s_endpgm
;
; GFX8-LABEL: kernel_fpmode_i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX8-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 19)
; GFX8-NEXT: s_and_b32 s2, 0x7f3ff, s2
; GFX8-NEXT: v_mov_b32_e32 v2, s2
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
;
; GFX9-LABEL: kernel_fpmode_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX9-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s2, 0x87f3ff, s2
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, s2
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: kernel_fpmode_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX10-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_and_b32 s2, 0x87f3ff, s2
; GFX10-NEXT: v_mov_b32_e32 v1, s2
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: kernel_fpmode_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s2, 0x87f3ff, s2
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%fpmode = call i32 @llvm.get.fpmode.i32()
store i32 %fpmode, ptr addrspace(1) %ptr
ret void
}
; TODO: We should be able to reduce the demanded bits and ask for less
; from s_getreg_b32
define i32 @func_fpmode_i32_denormonly() {
; GFX678-LABEL: func_fpmode_i32_denormonly:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 0xf0
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_denormonly:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 0xf0
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_denormonly:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 0xf0
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_denormonly:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 0xf0
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%denorm.only = and i32 %fpmode, 240
ret i32 %denorm.only
}
define i32 @func_fpmode_i32_roundonly() {
; GFX678-LABEL: func_fpmode_i32_roundonly:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 15
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_roundonly:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 15
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_roundonly:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 15
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_roundonly:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 15
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%round.only = and i32 %fpmode, 15
ret i32 %round.only
}
define i32 @func_fpmode_i32_round_denorm_only() {
; GFX678-LABEL: func_fpmode_i32_round_denorm_only:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 0xff
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_round_denorm_only:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 0xff
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_round_denorm_only:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 0xff
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_round_denorm_only:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 0xff
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%round.denorm.only = and i32 %fpmode, 255
ret i32 %round.denorm.only
}
define i32 @func_fpmode_i32_round_denorm_dx10_ieee() {
; GFX678-LABEL: func_fpmode_i32_round_denorm_dx10_ieee:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 0x3ff
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_round_denorm_dx10_ieee:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 0x3ff
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_round_denorm_dx10_ieee:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 0x3ff
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_round_denorm_dx10_ieee:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 0x3ff
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%core.mode = and i32 %fpmode, 1023
ret i32 %core.mode
}
define i32 @func_fpmode_i32_excp_en() {
; GFX678-LABEL: func_fpmode_i32_excp_en:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 0x7f000
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_excp_en:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 0x7f000
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_excp_en:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 0x7f000
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_excp_en:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 0x7f000
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%core.mode = and i32 %fpmode, 520192
ret i32 %core.mode
}
; Mask for all bits used on gfx6+
define i32 @func_fpmode_i32_environment_gfx6() {
; GFX678-LABEL: func_fpmode_i32_environment_gfx6:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 0x7f3ff
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_environment_gfx6:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 0x7f3ff
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_environment_gfx6:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 0x7f3ff
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_environment_gfx6:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 0x7f3ff
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%core.mode = and i32 %fpmode, 521215
ret i32 %core.mode
}
; Mask for all bits used on gfx9+
define i32 @func_fpmode_i32_environment_gfx9() {
; GFX678-LABEL: func_fpmode_i32_environment_gfx9:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 0x87f3ff
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_environment_gfx9:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 0x87f3ff
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_environment_gfx9:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 0x87f3ff
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_environment_gfx9:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 0x87f3ff
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%core.mode = and i32 %fpmode, 8909823
ret i32 %core.mode
}
define i32 @func_fpmode_i32_denormf32only() {
; GFX678-LABEL: func_fpmode_i32_denormf32only:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 48
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_denormf32only:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 48
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_denormf32only:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 48
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_denormf32only:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 48
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%denorm.only = and i32 %fpmode, 48
ret i32 %denorm.only
}
define i32 @func_fpmode_i32_denormf32only_0() {
; GFX678-LABEL: func_fpmode_i32_denormf32only_0:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 32
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_denormf32only_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 32
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_denormf32only_0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 32
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_denormf32only_0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 32
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%denorm.only = and i32 %fpmode, 32
ret i32 %denorm.only
}
define i32 @func_fpmode_i32_denormf32only_1() {
; GFX678-LABEL: func_fpmode_i32_denormf32only_1:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 64
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_denormf32only_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 64
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_denormf32only_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 64
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_denormf32only_1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 64
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%denorm.only = and i32 %fpmode, 64
ret i32 %denorm.only
}
define i32 @func_fpmode_i32_denormf64f16only() {
; GFX678-LABEL: func_fpmode_i32_denormf64f16only:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 0xc0
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_denormf64f16only:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 0xc0
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_denormf64f16only:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 0xc0
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_denormf64f16only:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 0xc0
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%denorm.only = and i32 %fpmode, 192
ret i32 %denorm.only
}
define i32 @func_fpmode_i32_dx10_clamp_only() {
; GFX678-LABEL: func_fpmode_i32_dx10_clamp_only:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 0x100
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_dx10_clamp_only:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 0x100
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_dx10_clamp_only:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 0x100
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_dx10_clamp_only:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 0x100
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%dx10.only = and i32 %fpmode, 256
ret i32 %dx10.only
}
define i32 @func_fpmode_i32_ieee_only() {
; GFX678-LABEL: func_fpmode_i32_ieee_only:
; GFX678: ; %bb.0:
; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
; GFX678-NEXT: s_and_b32 s4, s4, 0x200
; GFX678-NEXT: v_mov_b32_e32 v0, s4
; GFX678-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: func_fpmode_i32_ieee_only:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX9-NEXT: s_and_b32 s4, s4, 0x200
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: func_fpmode_i32_ieee_only:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
; GFX10-NEXT: s_and_b32 s4, s4, 0x200
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: func_fpmode_i32_ieee_only:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
; GFX11-NEXT: s_and_b32 s0, s0, 0x200
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fpmode = call i32 @llvm.get.fpmode.i32()
%ieee.only = and i32 %fpmode, 512
ret i32 %ieee.only
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}
; GFX1011: {{.*}}