amd_ext_to_khr.cpp | Explore in Territory

// Copyright (c) 2019 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "source/opt/amd_ext_to_khr.h"

#include <set>
#include <string>

#include "ir_builder.h"
#include "source/opt/ir_context.h"
#include "spv-amd-shader-ballot.insts.inc"
#include "type_manager.h"

namespace spvtools {
namespace opt {
namespace {

enum AmdShaderBallotExtOpcodes { … };

enum AmdShaderTrinaryMinMaxExtOpCodes { … };

enum AmdGcnShader { … };

analysis::Type* GetUIntType(IRContext* ctx) { … }

// Returns a folding rule that replaces |op(a,b,c)| by |op(op(a,b),c)|, where
// |op| is either min or max. |opcode| is the binary opcode in the GLSLstd450
// extended instruction set that corresponds to the trinary instruction being
// replaced.
template <GLSLstd450 opcode>
bool ReplaceTrinaryMinMax(IRContext* ctx, Instruction* inst,
                          const std::vector<const analysis::Constant*>&) { … }

// Returns a folding rule that replaces |mid(a,b,c)| by |clamp(a, min(b,c),
// max(b,c)|. The three parameters are the opcode that correspond to the min,
// max, and clamp operations for the type of the instruction being replaced.
template <GLSLstd450 min_opcode, GLSLstd450 max_opcode, GLSLstd450 clamp_opcode>
bool ReplaceTrinaryMid(IRContext* ctx, Instruction* inst,
                       const std::vector<const analysis::Constant*>&) { … }

// Returns a folding rule that will replace the opcode with |opcode| and add
// the capabilities required.  The folding rule assumes it is folding an
// OpGroup*NonUniformAMD instruction from the SPV_AMD_shader_ballot extension.
template <spv::Op new_opcode>
bool ReplaceGroupNonuniformOperationOpCode(
    IRContext* ctx, Instruction* inst,
    const std::vector<const analysis::Constant*>&) { … }

// Returns a folding rule that will replace the SwizzleInvocationsAMD extended
// instruction in the SPV_AMD_shader_ballot extension.
//
// The instruction
//
//  %offset = OpConstantComposite %v3uint %x %y %z %w
//  %result = OpExtInst %type %1 SwizzleInvocationsAMD %data %offset
//
// is replaced with
//
// potentially new constants and types
//
// clang-format off
//         %uint_max = OpConstant %uint 0xFFFFFFFF
//           %v4uint = OpTypeVector %uint 4
//     %ballot_value = OpConstantComposite %v4uint %uint_max %uint_max %uint_max %uint_max
//             %null = OpConstantNull %type
// clang-format on
//
// and the following code in the function body
//
// clang-format off
//         %id = OpLoad %uint %SubgroupLocalInvocationId
//   %quad_idx = OpBitwiseAnd %uint %id %uint_3
//   %quad_ldr = OpBitwiseXor %uint %id %quad_idx
//  %my_offset = OpVectorExtractDynamic %uint %offset %quad_idx
// %target_inv = OpIAdd %uint %quad_ldr %my_offset
//  %is_active = OpGroupNonUniformBallotBitExtract %bool %uint_3 %ballot_value %target_inv
//    %shuffle = OpGroupNonUniformShuffle %type %uint_3 %data %target_inv
//     %result = OpSelect %type %is_active %shuffle %null
// clang-format on
//
// Also adding the capabilities and builtins that are needed.
bool ReplaceSwizzleInvocations(IRContext* ctx, Instruction* inst,
                               const std::vector<const analysis::Constant*>&) { … }

// Returns a folding rule that will replace the SwizzleInvocationsMaskedAMD
// extended instruction in the SPV_AMD_shader_ballot extension.
//
// The instruction
//
//    %mask = OpConstantComposite %v3uint %uint_x %uint_y %uint_z
//  %result = OpExtInst %uint %1 SwizzleInvocationsMaskedAMD %data %mask
//
// is replaced with
//
// potentially new constants and types
//
// clang-format off
// %uint_mask_extend = OpConstant %uint 0xFFFFFFE0
//         %uint_max = OpConstant %uint 0xFFFFFFFF
//           %v4uint = OpTypeVector %uint 4
//     %ballot_value = OpConstantComposite %v4uint %uint_max %uint_max %uint_max %uint_max
// clang-format on
//
// and the following code in the function body
//
// clang-format off
//         %id = OpLoad %uint %SubgroupLocalInvocationId
//   %and_mask = OpBitwiseOr %uint %uint_x %uint_mask_extend
//        %and = OpBitwiseAnd %uint %id %and_mask
//         %or = OpBitwiseOr %uint %and %uint_y
// %target_inv = OpBitwiseXor %uint %or %uint_z
//  %is_active = OpGroupNonUniformBallotBitExtract %bool %uint_3 %ballot_value %target_inv
//    %shuffle = OpGroupNonUniformShuffle %type %uint_3 %data %target_inv
//     %result = OpSelect %type %is_active %shuffle %uint_0
// clang-format on
//
// Also adding the capabilities and builtins that are needed.
bool ReplaceSwizzleInvocationsMasked(
    IRContext* ctx, Instruction* inst,
    const std::vector<const analysis::Constant*>&) { … }

// Returns a folding rule that will replace the WriteInvocationAMD extended
// instruction in the SPV_AMD_shader_ballot extension.
//
// The instruction
//
// clang-format off
//    %result = OpExtInst %type %1 WriteInvocationAMD %input_value %write_value %invocation_index
// clang-format on
//
// with
//
//     %id = OpLoad %uint %SubgroupLocalInvocationId
//    %cmp = OpIEqual %bool %id %invocation_index
// %result = OpSelect %type %cmp %write_value %input_value
//
// Also adding the capabilities and builtins that are needed.
bool ReplaceWriteInvocation(IRContext* ctx, Instruction* inst,
                            const std::vector<const analysis::Constant*>&) { … }

// Returns a folding rule that will replace the MbcntAMD extended instruction in
// the SPV_AMD_shader_ballot extension.
//
// The instruction
//
//  %result = OpExtInst %uint %1 MbcntAMD %mask
//
// with
//
// Get SubgroupLtMask and convert the first 64-bits into a uint64_t because
// AMD's shader compiler expects a 64-bit integer mask.
//
//     %var = OpLoad %v4uint %SubgroupLtMaskKHR
// %shuffle = OpVectorShuffle %v2uint %var %var 0 1
//    %cast = OpBitcast %ulong %shuffle
//
// Perform the mask and count the bits.
//
//     %and = OpBitwiseAnd %ulong %cast %mask
//  %result = OpBitCount %uint %and
//
// Also adding the capabilities and builtins that are needed.
bool ReplaceMbcnt(IRContext* context, Instruction* inst,
                  const std::vector<const analysis::Constant*>&) { … }

// A folding rule that will replace the CubeFaceCoordAMD extended
// instruction in the SPV_AMD_gcn_shader_ballot.  Returns true if the folding is
// successful.
//
// The instruction
//
//  %result = OpExtInst %v2float %1 CubeFaceCoordAMD %input
//
// with
//
//             %x = OpCompositeExtract %float %input 0
//             %y = OpCompositeExtract %float %input 1
//             %z = OpCompositeExtract %float %input 2
//            %nx = OpFNegate %float %x
//            %ny = OpFNegate %float %y
//            %nz = OpFNegate %float %z
//            %ax = OpExtInst %float %n_1 FAbs %x
//            %ay = OpExtInst %float %n_1 FAbs %y
//            %az = OpExtInst %float %n_1 FAbs %z
//      %amax_x_y = OpExtInst %float %n_1 FMax %ay %ax
//          %amax = OpExtInst %float %n_1 FMax %az %amax_x_y
//        %cubema = OpFMul %float %float_2 %amax
//      %is_z_max = OpFOrdGreaterThanEqual %bool %az %amax_x_y
//  %not_is_z_max = OpLogicalNot %bool %is_z_max
//        %y_gt_x = OpFOrdGreaterThanEqual %bool %ay %ax
//      %is_y_max = OpLogicalAnd %bool %not_is_z_max %y_gt_x
//      %is_z_neg = OpFOrdLessThan %bool %z %float_0
// %cubesc_case_1 = OpSelect %float %is_z_neg %nx %x
//      %is_x_neg = OpFOrdLessThan %bool %x %float_0
// %cubesc_case_2 = OpSelect %float %is_x_neg %z %nz
//           %sel = OpSelect %float %is_y_max %x %cubesc_case_2
//        %cubesc = OpSelect %float %is_z_max %cubesc_case_1 %sel
//      %is_y_neg = OpFOrdLessThan %bool %y %float_0
// %cubetc_case_1 = OpSelect %float %is_y_neg %nz %z
//        %cubetc = OpSelect %float %is_y_max %cubetc_case_1 %ny
//          %cube = OpCompositeConstruct %v2float %cubesc %cubetc
//         %denom = OpCompositeConstruct %v2float %cubema %cubema
//           %div = OpFDiv %v2float %cube %denom
//        %result = OpFAdd %v2float %div %const
//
// Also adding the capabilities and builtins that are needed.
bool ReplaceCubeFaceCoord(IRContext* ctx, Instruction* inst,
                          const std::vector<const analysis::Constant*>&) { … }

// A folding rule that will replace the CubeFaceIndexAMD extended
// instruction in the SPV_AMD_gcn_shader_ballot.  Returns true if the folding
// is successful.
//
// The instruction
//
//  %result = OpExtInst %float %1 CubeFaceIndexAMD %input
//
// with
//
//             %x = OpCompositeExtract %float %input 0
//             %y = OpCompositeExtract %float %input 1
//             %z = OpCompositeExtract %float %input 2
//            %ax = OpExtInst %float %n_1 FAbs %x
//            %ay = OpExtInst %float %n_1 FAbs %y
//            %az = OpExtInst %float %n_1 FAbs %z
//      %is_z_neg = OpFOrdLessThan %bool %z %float_0
//      %is_y_neg = OpFOrdLessThan %bool %y %float_0
//      %is_x_neg = OpFOrdLessThan %bool %x %float_0
//      %amax_x_y = OpExtInst %float %n_1 FMax %ax %ay
//      %is_z_max = OpFOrdGreaterThanEqual %bool %az %amax_x_y
//        %y_gt_x = OpFOrdGreaterThanEqual %bool %ay %ax
//        %case_z = OpSelect %float %is_z_neg %float_5 %float4
//        %case_y = OpSelect %float %is_y_neg %float_3 %float2
//        %case_x = OpSelect %float %is_x_neg %float_1 %float0
//           %sel = OpSelect %float %y_gt_x %case_y %case_x
//        %result = OpSelect %float %is_z_max %case_z %sel
//
// Also adding the capabilities and builtins that are needed.
bool ReplaceCubeFaceIndex(IRContext* ctx, Instruction* inst,
                          const std::vector<const analysis::Constant*>&) { … }

// A folding rule that will replace the TimeAMD extended instruction in the
// SPV_AMD_gcn_shader_ballot.  It returns true if the folding is successful.
// It returns False, otherwise.
//
// The instruction
//
//  %result = OpExtInst %uint64 %1 TimeAMD
//
// with
//
//  %result = OpReadClockKHR %uint64 %uint_3
//
// NOTE: TimeAMD uses subgroup scope (it is not a real time clock).
bool ReplaceTimeAMD(IRContext* ctx, Instruction* inst,
                    const std::vector<const analysis::Constant*>&) { … }

class AmdExtFoldingRules : public FoldingRules { … };

class AmdExtConstFoldingRules : public ConstantFoldingRules { … };

}  // namespace

Pass::Status AmdExtensionToKhrPass::Process() { … }

}  // namespace opt
}  // namespace spvtools
chromium/third_party/spirv-tools/src/source/opt/amd_ext_to_khr.cpp