/* * Copyright 2021 Google LLC * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ #ifndef skgpu_UniformManager_DEFINED #define skgpu_UniformManager_DEFINED #include "include/core/SkM44.h" #include "include/core/SkMatrix.h" #include "include/core/SkPoint.h" #include "include/core/SkPoint3.h" #include "include/core/SkRect.h" #include "include/core/SkRefCnt.h" #include "include/core/SkSize.h" #include "include/core/SkSpan.h" #include "include/private/SkColorData.h" #include "include/private/base/SkAlign.h" #include "include/private/base/SkTDArray.h" #include "src/base/SkHalf.h" #include "src/base/SkMathPriv.h" #include "src/core/SkMatrixPriv.h" #include "src/core/SkSLTypeShared.h" #include "src/gpu/graphite/ResourceTypes.h" #include "src/gpu/graphite/Uniform.h" #include <algorithm> #include <memory> namespace skgpu::graphite { class UniformDataBlock; /** * Layout::kStd140 * =============== * * From OpenGL Specification Section 7.6.2.2 "Standard Uniform Block Layout" * [https://registry.khronos.org/OpenGL/specs/gl/glspec45.core.pdf#page=159]: * 1. If the member is a scalar consuming N basic machine units, the base alignment is N. * 2. If the member is a two- or four-component vector with components consuming N basic machine * units, the base alignment is 2N or 4N, respectively. * 3. If the member is a three-component vector with components consuming N * basic machine units, the base alignment is 4N. * 4. If the member is an array of scalars or vectors, the base alignment and array * stride are set to match the base alignment of a single array element, according * to rules (1), (2), and (3), and rounded up to the base alignment of a vec4. The * array may have padding at the end; the base offset of the member following * the array is rounded up to the next multiple of the base alignment. * 5. If the member is a column-major matrix with C columns and R rows, the * matrix is stored identically to an array of C column vectors with R components each, * according to rule (4). * 6. If the member is an array of S column-major matrices with C columns and * R rows, the matrix is stored identically to a row of S × C column vectors * with R components each, according to rule (4). * 7. If the member is a row-major matrix with C columns and R rows, the matrix * is stored identically to an array of R row vectors with C components each, * according to rule (4). * 8. If the member is an array of S row-major matrices with C columns and R * rows, the matrix is stored identically to a row of S × R row vectors with C * components each, according to rule (4). * 9. If the member is a structure, the base alignment of the structure is N, where * N is the largest base alignment value of any of its members, and rounded * up to the base alignment of a vec4. The individual members of this substructure are then * assigned offsets by applying this set of rules recursively, * where the base offset of the first member of the sub-structure is equal to the * aligned offset of the structure. The structure may have padding at the end; * the base offset of the member following the sub-structure is rounded up to * the next multiple of the base alignment of the structure. * 10. If the member is an array of S structures, the S elements of the array are laid * out in order, according to rule (9). * * Layout::kStd430 * =============== * * When using the std430 storage layout, shader storage blocks will be laid out in buffer storage * identically to uniform and shader storage blocks using the std140 layout, except that the base * alignment and stride of arrays of scalars and vectors in rule 4 and of structures in rule 9 are * not rounded up a multiple of the base alignment of a vec4. * * NOTE: While not explicitly stated, the layout rules for WebGPU and WGSL are identical to std430 * for SSBOs and nearly identical to std140 for UBOs. The default mat2x2 type is treated as two * float2's (not an array), so its size is 16 and alignment is 8 (vs. a size of 32 and alignment of * 16 in std140). When emitting WGSL from SkSL, prepareUniformPolyfillsForInterfaceBlock() defined * in WGSLCodeGenerator, will modify the type declaration to match std140 exactly. This allows the * UniformManager and UniformOffsetCalculator to avoid having WebGPU-specific layout rules * (whereas SkSL::MemoryLayout has more complete rules). * * Layout::kMetal * =============== * * SkSL converts its types to the non-packed SIMD vector types in MSL. The size and alignment rules * are equivalent to std430 with the exception of half3 and float3. In std430, the size consumed * by non-array uniforms of these types is 3N while Metal consumes 4N (which is equal to the * alignment of a vec3 in both Layouts). * * Half vs. Float Uniforms * ======================= * * Regardless of the precision when the shader is executed, std140 and std430 layouts consume * "half"-based uniforms in full 32-bit precision. Metal consumes "half"-based uniforms expecting * them to have already been converted to f16. WebGPU has an extension to support f16 types, which * behave like this, but we do not currently utilize it. * * The rules for std430 can be easily extended to f16 by applying N = 2 instead of N = 4 for the * base primitive alignment. * * NOTE: This could also apply to the int vs. short or uint vs. ushort types, but these smaller * integer types are not supported on all platforms as uniforms. We disallow short integer uniforms * entirely, and if the data savings are required, packing should be implemented manually. * Short integer vertex attributes are supported when the vector type lets it pack into 32 bits * (e.g. int16x2 or int8x4). * * * Generalized Layout Rules * ======================== * * From the Layout descriptions above, the following simpler rules are sufficient: * * 1. If the base primitive type is "half" and the Layout expects half floats, N = 2; else, N = 4. * * 2. For arrays of scalars or vectors (with # of components, M = 1,2,3,4): * a. If arrays must be aligned on vec4 boundaries OR M=3, then align and stride = 4*N. * b. Otherwise, the align and stride = M*N. * * In both cases, the total size required for the uniform is "array size"*stride. * * 3. For single scalars or vectors (M = 1,2,3,4), the align is SkNextPow2(M)*N (e.g. N,2N,4N,4N). * a. If M = 3 and the Layout aligns the size with the alignment, the size is 4*N and N * padding bytes must be zero'ed out afterwards. * b. Otherwise, the align and size = M*N * * 4. The starting offset to write data is the current offset aligned to the calculated align value. * The current offset is then incremented by the total size of the uniform. * * For arrays and padded vec3's, the padding is included in the stride and total size, meeting * the requirements of the original rule 4 in std140. When a single float3 that is not padded * is written, the next offset only advances 12 bytes allowing a smaller type to pack tightly * next to the Z coordinate. * * When N = 4, the CPU and GPU primitives are compatible, regardless of being float, int, or uint. * Contiguous ranges between any padding (for alignment or for array stride) can be memcpy'ed. * When N = 2, the CPU data is float and the GPU data f16, so values must be converted one primitive * at a time using SkFloatToHalf or skvx::to_half. * * The UniformManager will zero out any padding bytes (either prepended for starting alignment, * or appended for stride alignment). This is so that the final byte array can be hashed for uniform * value de-duplication before uploading to the GPU. * * While SkSL supports non-square matrices, the SkSLType enum and Graphite only expose support for * square matrices. Graphite assumes all matrix uniforms are in column-major order. This matches the * data layout of SkM44 already and UniformManager automatically transposes SkMatrix (which is in * row-major data) to be column-major. Thus, for layout purposes, a matrix or an array of matrices * can be laid out equivalently to an array of the column type with an array count multiplied by the * number of columns. * * Graphite does not embed structs within structs for its UBO or SSBO declarations for paint or * RenderSteps. However, when the "uniforms" are defined for use with SSBO random access, the * ordered set of uniforms is actually defining a struct instead of just a top-level interface. * As such, once all uniforms are recorded, the size must be rounded up to the maximum alignment * encountered for its members to satisfy alignment rules for all Layouts. * * If Graphite starts to define sub-structs, UniformOffsetCalculator can be used recursively. */ namespace LayoutRules { // The three diverging behaviors across the different Layouts: static constexpr bool PadVec3Size(Layout layout) { … } static constexpr bool AlignArraysAsVec4(Layout layout) { … } static constexpr bool UseFullPrecision(Layout layout) { … } } class UniformOffsetCalculator { … }; class UniformManager { … }; /////////////////////////////////////////////////////////////////////////////////////////////////// // Definitions // Shared helper for both write() and writeArray() template <int N, bool Half> struct LayoutTraits { … }; template<int N, bool Half> void UniformManager::write(const void* src, SkSLType type) { … } template<int N, bool Half> void UniformManager::writeArray(const void* src, int count, SkSLType type) { … } void UniformManager::alignTo(int alignment) { … } char* UniformManager::append(int alignment, int size) { … } } // namespace skgpu::graphite #endif // skgpu_UniformManager_DEFINED