// Copyright (c) 2019 Google LLC. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "source/opt/amd_ext_to_khr.h" #include <set> #include <string> #include "ir_builder.h" #include "source/opt/ir_context.h" #include "spv-amd-shader-ballot.insts.inc" #include "type_manager.h" namespace spvtools { namespace opt { namespace { enum AmdShaderBallotExtOpcodes { … }; enum AmdShaderTrinaryMinMaxExtOpCodes { … }; enum AmdGcnShader { … }; analysis::Type* GetUIntType(IRContext* ctx) { … } // Returns a folding rule that replaces |op(a,b,c)| by |op(op(a,b),c)|, where // |op| is either min or max. |opcode| is the binary opcode in the GLSLstd450 // extended instruction set that corresponds to the trinary instruction being // replaced. template <GLSLstd450 opcode> bool ReplaceTrinaryMinMax(IRContext* ctx, Instruction* inst, const std::vector<const analysis::Constant*>&) { … } // Returns a folding rule that replaces |mid(a,b,c)| by |clamp(a, min(b,c), // max(b,c)|. The three parameters are the opcode that correspond to the min, // max, and clamp operations for the type of the instruction being replaced. template <GLSLstd450 min_opcode, GLSLstd450 max_opcode, GLSLstd450 clamp_opcode> bool ReplaceTrinaryMid(IRContext* ctx, Instruction* inst, const std::vector<const analysis::Constant*>&) { … } // Returns a folding rule that will replace the opcode with |opcode| and add // the capabilities required. The folding rule assumes it is folding an // OpGroup*NonUniformAMD instruction from the SPV_AMD_shader_ballot extension. template <spv::Op new_opcode> bool ReplaceGroupNonuniformOperationOpCode( IRContext* ctx, Instruction* inst, const std::vector<const analysis::Constant*>&) { … } // Returns a folding rule that will replace the SwizzleInvocationsAMD extended // instruction in the SPV_AMD_shader_ballot extension. // // The instruction // // %offset = OpConstantComposite %v3uint %x %y %z %w // %result = OpExtInst %type %1 SwizzleInvocationsAMD %data %offset // // is replaced with // // potentially new constants and types // // clang-format off // %uint_max = OpConstant %uint 0xFFFFFFFF // %v4uint = OpTypeVector %uint 4 // %ballot_value = OpConstantComposite %v4uint %uint_max %uint_max %uint_max %uint_max // %null = OpConstantNull %type // clang-format on // // and the following code in the function body // // clang-format off // %id = OpLoad %uint %SubgroupLocalInvocationId // %quad_idx = OpBitwiseAnd %uint %id %uint_3 // %quad_ldr = OpBitwiseXor %uint %id %quad_idx // %my_offset = OpVectorExtractDynamic %uint %offset %quad_idx // %target_inv = OpIAdd %uint %quad_ldr %my_offset // %is_active = OpGroupNonUniformBallotBitExtract %bool %uint_3 %ballot_value %target_inv // %shuffle = OpGroupNonUniformShuffle %type %uint_3 %data %target_inv // %result = OpSelect %type %is_active %shuffle %null // clang-format on // // Also adding the capabilities and builtins that are needed. bool ReplaceSwizzleInvocations(IRContext* ctx, Instruction* inst, const std::vector<const analysis::Constant*>&) { … } // Returns a folding rule that will replace the SwizzleInvocationsMaskedAMD // extended instruction in the SPV_AMD_shader_ballot extension. // // The instruction // // %mask = OpConstantComposite %v3uint %uint_x %uint_y %uint_z // %result = OpExtInst %uint %1 SwizzleInvocationsMaskedAMD %data %mask // // is replaced with // // potentially new constants and types // // clang-format off // %uint_mask_extend = OpConstant %uint 0xFFFFFFE0 // %uint_max = OpConstant %uint 0xFFFFFFFF // %v4uint = OpTypeVector %uint 4 // %ballot_value = OpConstantComposite %v4uint %uint_max %uint_max %uint_max %uint_max // clang-format on // // and the following code in the function body // // clang-format off // %id = OpLoad %uint %SubgroupLocalInvocationId // %and_mask = OpBitwiseOr %uint %uint_x %uint_mask_extend // %and = OpBitwiseAnd %uint %id %and_mask // %or = OpBitwiseOr %uint %and %uint_y // %target_inv = OpBitwiseXor %uint %or %uint_z // %is_active = OpGroupNonUniformBallotBitExtract %bool %uint_3 %ballot_value %target_inv // %shuffle = OpGroupNonUniformShuffle %type %uint_3 %data %target_inv // %result = OpSelect %type %is_active %shuffle %uint_0 // clang-format on // // Also adding the capabilities and builtins that are needed. bool ReplaceSwizzleInvocationsMasked( IRContext* ctx, Instruction* inst, const std::vector<const analysis::Constant*>&) { … } // Returns a folding rule that will replace the WriteInvocationAMD extended // instruction in the SPV_AMD_shader_ballot extension. // // The instruction // // clang-format off // %result = OpExtInst %type %1 WriteInvocationAMD %input_value %write_value %invocation_index // clang-format on // // with // // %id = OpLoad %uint %SubgroupLocalInvocationId // %cmp = OpIEqual %bool %id %invocation_index // %result = OpSelect %type %cmp %write_value %input_value // // Also adding the capabilities and builtins that are needed. bool ReplaceWriteInvocation(IRContext* ctx, Instruction* inst, const std::vector<const analysis::Constant*>&) { … } // Returns a folding rule that will replace the MbcntAMD extended instruction in // the SPV_AMD_shader_ballot extension. // // The instruction // // %result = OpExtInst %uint %1 MbcntAMD %mask // // with // // Get SubgroupLtMask and convert the first 64-bits into a uint64_t because // AMD's shader compiler expects a 64-bit integer mask. // // %var = OpLoad %v4uint %SubgroupLtMaskKHR // %shuffle = OpVectorShuffle %v2uint %var %var 0 1 // %cast = OpBitcast %ulong %shuffle // // Perform the mask and count the bits. // // %and = OpBitwiseAnd %ulong %cast %mask // %result = OpBitCount %uint %and // // Also adding the capabilities and builtins that are needed. bool ReplaceMbcnt(IRContext* context, Instruction* inst, const std::vector<const analysis::Constant*>&) { … } // A folding rule that will replace the CubeFaceCoordAMD extended // instruction in the SPV_AMD_gcn_shader_ballot. Returns true if the folding is // successful. // // The instruction // // %result = OpExtInst %v2float %1 CubeFaceCoordAMD %input // // with // // %x = OpCompositeExtract %float %input 0 // %y = OpCompositeExtract %float %input 1 // %z = OpCompositeExtract %float %input 2 // %nx = OpFNegate %float %x // %ny = OpFNegate %float %y // %nz = OpFNegate %float %z // %ax = OpExtInst %float %n_1 FAbs %x // %ay = OpExtInst %float %n_1 FAbs %y // %az = OpExtInst %float %n_1 FAbs %z // %amax_x_y = OpExtInst %float %n_1 FMax %ay %ax // %amax = OpExtInst %float %n_1 FMax %az %amax_x_y // %cubema = OpFMul %float %float_2 %amax // %is_z_max = OpFOrdGreaterThanEqual %bool %az %amax_x_y // %not_is_z_max = OpLogicalNot %bool %is_z_max // %y_gt_x = OpFOrdGreaterThanEqual %bool %ay %ax // %is_y_max = OpLogicalAnd %bool %not_is_z_max %y_gt_x // %is_z_neg = OpFOrdLessThan %bool %z %float_0 // %cubesc_case_1 = OpSelect %float %is_z_neg %nx %x // %is_x_neg = OpFOrdLessThan %bool %x %float_0 // %cubesc_case_2 = OpSelect %float %is_x_neg %z %nz // %sel = OpSelect %float %is_y_max %x %cubesc_case_2 // %cubesc = OpSelect %float %is_z_max %cubesc_case_1 %sel // %is_y_neg = OpFOrdLessThan %bool %y %float_0 // %cubetc_case_1 = OpSelect %float %is_y_neg %nz %z // %cubetc = OpSelect %float %is_y_max %cubetc_case_1 %ny // %cube = OpCompositeConstruct %v2float %cubesc %cubetc // %denom = OpCompositeConstruct %v2float %cubema %cubema // %div = OpFDiv %v2float %cube %denom // %result = OpFAdd %v2float %div %const // // Also adding the capabilities and builtins that are needed. bool ReplaceCubeFaceCoord(IRContext* ctx, Instruction* inst, const std::vector<const analysis::Constant*>&) { … } // A folding rule that will replace the CubeFaceIndexAMD extended // instruction in the SPV_AMD_gcn_shader_ballot. Returns true if the folding // is successful. // // The instruction // // %result = OpExtInst %float %1 CubeFaceIndexAMD %input // // with // // %x = OpCompositeExtract %float %input 0 // %y = OpCompositeExtract %float %input 1 // %z = OpCompositeExtract %float %input 2 // %ax = OpExtInst %float %n_1 FAbs %x // %ay = OpExtInst %float %n_1 FAbs %y // %az = OpExtInst %float %n_1 FAbs %z // %is_z_neg = OpFOrdLessThan %bool %z %float_0 // %is_y_neg = OpFOrdLessThan %bool %y %float_0 // %is_x_neg = OpFOrdLessThan %bool %x %float_0 // %amax_x_y = OpExtInst %float %n_1 FMax %ax %ay // %is_z_max = OpFOrdGreaterThanEqual %bool %az %amax_x_y // %y_gt_x = OpFOrdGreaterThanEqual %bool %ay %ax // %case_z = OpSelect %float %is_z_neg %float_5 %float4 // %case_y = OpSelect %float %is_y_neg %float_3 %float2 // %case_x = OpSelect %float %is_x_neg %float_1 %float0 // %sel = OpSelect %float %y_gt_x %case_y %case_x // %result = OpSelect %float %is_z_max %case_z %sel // // Also adding the capabilities and builtins that are needed. bool ReplaceCubeFaceIndex(IRContext* ctx, Instruction* inst, const std::vector<const analysis::Constant*>&) { … } // A folding rule that will replace the TimeAMD extended instruction in the // SPV_AMD_gcn_shader_ballot. It returns true if the folding is successful. // It returns False, otherwise. // // The instruction // // %result = OpExtInst %uint64 %1 TimeAMD // // with // // %result = OpReadClockKHR %uint64 %uint_3 // // NOTE: TimeAMD uses subgroup scope (it is not a real time clock). bool ReplaceTimeAMD(IRContext* ctx, Instruction* inst, const std::vector<const analysis::Constant*>&) { … } class AmdExtFoldingRules : public FoldingRules { … }; class AmdExtConstFoldingRules : public ConstantFoldingRules { … }; } // namespace Pass::Status AmdExtensionToKhrPass::Process() { … } } // namespace opt } // namespace spvtools