// Copyright 2023 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // DO NOT EDIT // This file is automatically generated by the compile_shaders.py script #ifndef MEDIA_GPU_CHROMEOS_SHADERS_SHADERS_H_ #define MEDIA_GPU_CHROMEOS_SHADERS_SHADERS_H_ #include <stdint.h> namespace { constexpr uint32_t kCropRotateShaderFrag[] = …; // clang-format off /*** CropRotateShader.frag source: // Simple fragment shader for scaling, rotate, and crop. // All of the logic is in the corresponding vertex shader. #version 450 layout(binding = 0) uniform sampler2D inTexture; layout(location = 0) in vec2 texCoord; layout(location = 0) out vec4 outColor; void main() { outColor = texture(inTexture, texCoord); } ***/ // clang-format on constexpr uint32_t kCropRotateShaderVert[] = …; // clang-format off /*** CropRotateShader.vert source: // Vertex shader for scaling, rotate, and crop. #version 450 layout(location = 0) out vec2 outTexCoord; layout( push_constant ) uniform constants { vec2 vertices[6]; vec2 visibleScale; } pushConstants; // Texture coordinates are always fixed. vec2 texCoords[6] = vec2[6]( vec2(1.0, 0.0), vec2(1.0, 1.0), vec2(0.0, 0.0), vec2(0.0, 0.0), vec2(1.0, 1.0), vec2(0.0, 1.0) ); void main() { // Adjust the gl_Position according to scaling, rotate, and crop parameters. gl_Position = vec4(pushConstants.vertices[gl_VertexIndex], 0.0, 1.0); // We over-allocate the pivot buffer, so we need to scale texture coordinates // accordingly. outTexCoord = texCoords[gl_VertexIndex] * pushConstants.visibleScale; } ***/ // clang-format on constexpr uint32_t kMM21ShaderFrag[] = …; // clang-format off /*** MM21Shader.frag source: // Frag shader for MM21->ARGB conversion. #version 450 #extension GL_EXT_samplerless_texture_functions : require precision mediump float; precision mediump int; layout(location = 0) in mediump vec2 intraTileX; layout(location = 1) in mediump vec2 intraTileY; layout(location = 2) in flat highp vec2 yOffset; layout(location = 3) in flat highp vec2 xOffset; layout(location = 0) out vec4 outColor; layout( push_constant ) uniform constants { layout(offset = 24) vec2 planeStrides; } pushConstants; // Ideally we would just use a 1D texture, but Vulkan has very tight limits // on how large those can be, so we use 2D arrays instead and reinterpret our // address as texture coordinates. // TODO(b/304781371): Since the dimensions of these textures are arbitrary, // investigate if there are some dimensions which are more efficient than // others. For example, powers of 2 might make the division faster, or a // width of 16*32 might eliminate the need to do any division at all. layout(binding = 0) uniform texture2D lumaTexture; layout(binding = 1) uniform texture2D chromaTexture; const mat3 colorConversion = mat3(1.164, 1.164, 1.164, 0.0, -0.391, 2.018, 1.596, -0.813, 0.0); void main() { vec2 linearIdx = (floor(intraTileY) * vec2(16, 8)) + floor(intraTileX) + xOffset; // Like in the corresponding vertex shader, we really wanted integer // division and modulo, but floating point is faster. highp vec2 detiledY = floor(linearIdx / pushConstants.planeStrides); highp vec2 detiledX = linearIdx - (detiledY * pushConstants.planeStrides); detiledY += yOffset; vec3 yuv; yuv.r = texelFetch(lumaTexture, ivec2(detiledX[0], detiledY[0]), 0).r; yuv.gb = texelFetch(chromaTexture, ivec2(detiledX[1], detiledY[1]), 0).rg; // Standard BT.601 YUV->RGB color conversion. yuv.r -= 16.0/255.0; yuv.gb -= vec2(0.5, 0.5); outColor = vec4(colorConversion * yuv, 1.0); } ***/ // clang-format on constexpr uint32_t kMM21ShaderVert[] = …; // clang-format off /*** MM21Shader.vert source: // Vertex shader for MM21->ARGB conversion. // We produce 2 right triangles for each MM21 tile. This allows us to // compute a few important values in the vertex shader instead of the // fragment shader. This is desirable because the vertex shader only runs // 6 times (once for each vertex) per tile, which means the cost is amortized // over 16*32/6 ~= 85 pixels. #version 450 precision highp float; precision highp int; // We can actually exploit the rasterizer to figure out the intra tile // coordinates for us. layout(location = 0) out mediump vec2 intraTileX; layout(location = 1) out mediump vec2 intraTileY; layout(location = 2) flat out highp vec2 yOffset; layout(location = 3) flat out highp vec2 xOffset; layout( push_constant ) uniform constants { // Vulkan push constants have interesting alignment rules, so we use a vec2 // when we could get away with a float just to make things simple. vec2 tilesPerRow; vec2 dims; vec2 planeStrides; } pushConstants; const vec2 kLumaTileDims = vec2(16.0, 32.0); const vec2 kTileSize = vec2(512.0, 128.0); const vec2 intraTileCoords[6] = vec2[6]( vec2(16.0, 0.0), vec2(16.0, 32.0), vec2(0.0, 0.0), vec2(0.0, 0.0), vec2(16.0, 32.0), vec2(0.0, 32.0) ); void main() { // We really want something like: // int tileIdx = gl_VertexIndex / 6; // int tileVertIdx = gl_VertexIndex % 6; // But integer division and modulo are *very* expensive on mobile GPUs, so // we use floating point multiplication, subtraction, and flooring to // approximate these operations. // 0.1 is a fudge factor to counteract floating point rounding errors. // Note that we multiply this value by 6, so using 0.5 like we do in the frag // shader isn't appropriate because that will genuinely change the integer // answer. float tileIdx = floor(float(gl_VertexIndex) * (1.0 / 6.0)); float preciseTileIdx = tileIdx; tileIdx += 0.1; uint tileVertIdx = gl_VertexIndex - uint(tileIdx * 6.0); vec2 tileCoords; tileCoords.g = floor(tileIdx / pushConstants.tilesPerRow.x); tileCoords.r = floor(tileIdx - (tileCoords.g * pushConstants.tilesPerRow.x)); vec2 pos = tileCoords * kLumaTileDims + intraTileCoords[tileVertIdx]; pos = pos * 2.0 / pushConstants.dims - vec2(1.0, 1.0); gl_Position = vec4(pos, 0.0, 1.0); // Compute the base address for the whole tile. vec2 linearBase = preciseTileIdx * kTileSize; linearBase += 0.1; yOffset = floor(linearBase / pushConstants.planeStrides); xOffset = linearBase - (yOffset * pushConstants.planeStrides); vec4 intraTileCoord = vec4(intraTileCoords[tileVertIdx], intraTileCoords[tileVertIdx] / 2.0); intraTileX = intraTileCoord.rb; intraTileY = intraTileCoord.ga; } ***/ // clang-format on constexpr uint32_t kMT2TShaderFrag[] = …; // clang-format off /*** MT2TShader.frag source: // Frag shader for MT2T->AR30 conversion. #version 450 #extension GL_EXT_samplerless_texture_functions : require precision mediump float; precision mediump int; layout(location = 0) in vec2 intraTileX; layout(location = 1) in vec2 intraTileY; layout(location = 2) in flat highp vec2 yOffset; layout(location = 3) in flat highp vec2 xOffset; layout(location = 0) out vec4 outColor; layout( push_constant ) uniform constants { layout(offset = 24) highp vec2 planeStrides; } pushConstants; layout(binding = 0) uniform texture2D lumaTexture; layout(binding = 1) uniform texture2D chromaTexture; const mat3 colorConversion = mat3(1.164, 1.164, 1.164, 0.0, -0.391, 2.018, 1.596, -0.813, 0.0); void main() { vec2 blockIdx = floor(intraTileY / 4.0); vec2 blockRowIdx = intraTileY - (blockIdx * 4.0); // MSB indices need to be adjusted by how many LSB bytes are serialized // before the current MSB. Every 64 pixel block starts with 16 bytes of LSB // data. highp vec2 msbLinearIdx = (floor(intraTileY) * vec2(16, 8)) + floor(intraTileX) + xOffset; msbLinearIdx += (blockIdx + 1.0) * vec2(16, 8); // Likewise, we need to find the address of our LSB byte. Since each LSB byte // encodes the LSBs for a 1x4 mini-tile, we can compute a base address using // blockIdx * 16 bytes, and then offset it by the intra tile X coordinate. highp vec2 lsbLinearIdx = blockIdx * vec2(80, 40) + xOffset; lsbLinearIdx += floor(intraTileX); // 0.5 is a floating point issue fudge factor. highp vec4 linearIdx = vec4(msbLinearIdx, lsbLinearIdx); highp vec4 strides = vec4(pushConstants.planeStrides, pushConstants.planeStrides); highp vec4 detiledY = floor(linearIdx / strides); highp vec4 detiledX = linearIdx - (detiledY * strides); detiledY += vec4(yOffset, yOffset); vec3 yuv; yuv.r = texelFetch(lumaTexture, ivec2(detiledX.r, detiledY.r), 0).r; yuv.gb = texelFetch(chromaTexture, ivec2(detiledX.g, detiledY.g), 0).rg; vec3 yuvLsb; yuvLsb.r = texelFetch(lumaTexture, ivec2(detiledX.b, detiledY.b), 0).r; yuvLsb.gb = texelFetch(chromaTexture, ivec2(detiledX.a, detiledY.a), 0).rg; // LSBs are packed into their corresponding byte with the top of the tile // packed into the least significant 2 bits of the byte first. So ideally, // we would bit shift the LSB byte based on what row within the block the // current pixel is in, mask all but the lower two bits, and then shift those // bits into where they need to go. But, because both texelFetch and outColor // are floating points, this requires integer conversions, which can be quite // slow. So instead, we use a multiplication to emulate a left shift, then // fract() to emulate discarding the bits that were shifted too high for // the register, and then division (multiplication by the reciprocal) to // emulate a right shift. The left shift constant should be // 2^(2*(3-blockRowIdx)), and the right shift constant should be 2^-8 in // order for this to work. The one hitch is that the floating point color // values range from 0.0 to 1.0, but baked into our bit shifting trick is // the assumption that the colors range from 0.0 to 255.0/256.0 and 0.0 to // 1023.0/1024.0 respectively. We can fix this by factoring into our shift // multiplication constants the conversion terms 255.0/256.0 and // 1024.0/1023.0. // // Note that the nature of floating point division means we don't emulate // discarding bits that are shifted too low, we just keep them as a tiny // fractional component. This means this approach is only approximately // correct. But, it is guaranteed to be correct within 1/1024, which is // all we need for 10-bit color accuracy. vec3 shift = ldexp(vec2((255.0/256.0)), ivec2(2.0 * (3.0 - blockRowIdx))).rgg; yuvLsb *= shift; yuvLsb = fract(yuvLsb); yuvLsb *= (1.0 / 256.0 * 1024.0 / 1023.0); yuv += yuvLsb; yuv.r -= 16.0/255.0; yuv.gb -= vec2(0.5, 0.5); outColor = vec4(colorConversion * yuv, 1.0); } ***/ // clang-format on constexpr uint32_t kMT2TShaderVert[] = …; // clang-format off /*** MT2TShader.vert source: // Vertex shader for MT2T->AR30 conversion. // This shader is very similar to its MM21 counterpart, with the important // difference being that the linear base address needs to take into account the // packed LSB data, so we multiply it by 10/8=5/4. #version 450 precision highp float; precision highp int; // We can actually exploit the rasterizer to figure out the intra tile // coordinates for us. layout(location = 0) out mediump vec2 intraTileX; layout(location = 1) out mediump vec2 intraTileY; layout(location = 2) flat out vec2 yOffset; layout(location = 3) flat out vec2 xOffset; layout( push_constant ) uniform constants { // Vulkan push constants have interesting alignment rules, so we use a vec2 // when we could get away with a float just to make things simple. vec2 tilesPerRow; vec2 dims; vec2 planeStrides; } pushConstants; const vec2 kLumaTileDims = vec2(16.0, 32.0); const vec2 kTileSize = vec2(640.0, 160.0); const vec2 intraTileCoords[6] = vec2[6]( vec2(16.0, 0.0), vec2(16.0, 32.0), vec2(0.0, 0.0), vec2(0.0, 0.0), vec2(16.0, 32.0), vec2(0.0, 32.0) ); void main() { // We really want something like: // int tileIdx = gl_VertexIndex / 6; // int tileVertIdx = gl_VertexIndex % 6; // But integer division and modulo are *very* expensive on mobile GPUs, so // we use floating point multiplication, subtraction, and flooring to // approximate these operations. // 0.1 is a fudge factor to counteract floating point rounding errors. // Note that we multiply this value by 6, so using 0.5 like we do in the frag // shader isn't appropriate because that will genuinely change the integer // answer. float tileIdx = floor(float(gl_VertexIndex) * (1.0 / 6.0)); float preciseTileIdx = tileIdx; tileIdx += 0.1; uint tileVertIdx = gl_VertexIndex - uint(tileIdx * 6.0); vec2 tileCoords; tileCoords.g = floor(tileIdx / pushConstants.tilesPerRow.x); tileCoords.r = floor(tileIdx - (tileCoords.g * pushConstants.tilesPerRow.x)); vec2 pos = tileCoords * kLumaTileDims + intraTileCoords[tileVertIdx]; pos = pos * 2.0 / pushConstants.dims - vec2(1.0, 1.0); gl_Position = vec4(pos, 0.0, 1.0); // Compute the base address for the whole tile. vec2 linearBase = preciseTileIdx * kTileSize; yOffset = floor(linearBase / pushConstants.planeStrides); xOffset = linearBase - (yOffset * pushConstants.planeStrides); vec4 intraTileCoord = vec4(intraTileCoords[tileVertIdx], intraTileCoords[tileVertIdx] / 2.0); intraTileX = intraTileCoord.rb; intraTileY = intraTileCoord.ga; } ***/ // clang-format on } // namespace #endif