shaders.h | Explore in Territory

// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// DO NOT EDIT
// This file is automatically generated by the compile_shaders.py script

#ifndef MEDIA_GPU_CHROMEOS_SHADERS_SHADERS_H_
#define MEDIA_GPU_CHROMEOS_SHADERS_SHADERS_H_

#include <stdint.h>

namespace {

constexpr uint32_t kCropRotateShaderFrag[] = …;
// clang-format off
/***
CropRotateShader.frag source:
// Simple fragment shader for scaling, rotate, and crop.
// All of the logic is in the corresponding vertex shader.

#version 450

layout(binding = 0) uniform sampler2D inTexture;

layout(location = 0) in vec2 texCoord;

layout(location = 0) out vec4 outColor;

void main() {
  outColor = texture(inTexture, texCoord);
}
***/
// clang-format on

constexpr uint32_t kCropRotateShaderVert[] = …;
// clang-format off
/***
CropRotateShader.vert source:
// Vertex shader for scaling, rotate, and crop.

#version 450

layout(location = 0) out vec2 outTexCoord;

layout( push_constant ) uniform constants {
  vec2 vertices[6];
  vec2 visibleScale;
} pushConstants;

// Texture coordinates are always fixed.
vec2 texCoords[6] = vec2[6](
  vec2(1.0, 0.0),
  vec2(1.0, 1.0),
  vec2(0.0, 0.0),
  vec2(0.0, 0.0),
  vec2(1.0, 1.0),
  vec2(0.0, 1.0)
);

void main() {
  // Adjust the gl_Position according to scaling, rotate, and crop parameters.
  gl_Position = vec4(pushConstants.vertices[gl_VertexIndex], 0.0, 1.0);

  // We over-allocate the pivot buffer, so we need to scale texture coordinates
  // accordingly.
  outTexCoord = texCoords[gl_VertexIndex] * pushConstants.visibleScale;
}
***/
// clang-format on

constexpr uint32_t kMM21ShaderFrag[] = …;
// clang-format off
/***
MM21Shader.frag source:
// Frag shader for MM21->ARGB conversion.

#version 450
#extension GL_EXT_samplerless_texture_functions : require

precision mediump float;
precision mediump int;

layout(location = 0) in mediump vec2 intraTileX;
layout(location = 1) in mediump vec2 intraTileY;

layout(location = 2) in flat highp vec2 yOffset;
layout(location = 3) in flat highp vec2 xOffset;

layout(location = 0) out vec4 outColor;

layout( push_constant ) uniform constants {
  layout(offset = 24) vec2 planeStrides;
} pushConstants;

// Ideally we would just use a 1D texture, but Vulkan has very tight limits
// on how large those can be, so we use 2D arrays instead and reinterpret our
// address as texture coordinates.
// TODO(b/304781371): Since the dimensions of these textures are arbitrary,
// investigate if there are some dimensions which are more efficient than
// others. For example, powers of 2 might make the division faster, or a
// width of 16*32 might eliminate the need to do any division at all.
layout(binding = 0) uniform texture2D lumaTexture;
layout(binding = 1) uniform texture2D chromaTexture;

const mat3 colorConversion = mat3(1.164, 1.164, 1.164,
          0.0, -0.391, 2.018,
          1.596, -0.813, 0.0);

void main() {
  vec2 linearIdx = (floor(intraTileY) * vec2(16, 8)) +
                   floor(intraTileX) + xOffset;
  // Like in the corresponding vertex shader, we really wanted integer
  // division and modulo, but floating point is faster.
  highp vec2 detiledY = floor(linearIdx / pushConstants.planeStrides);
  highp vec2 detiledX = linearIdx - (detiledY * pushConstants.planeStrides);
  detiledY += yOffset;

  vec3 yuv;
  yuv.r = texelFetch(lumaTexture, ivec2(detiledX[0], detiledY[0]), 0).r;
  yuv.gb = texelFetch(chromaTexture, ivec2(detiledX[1], detiledY[1]), 0).rg;

  // Standard BT.601 YUV->RGB color conversion.
  yuv.r -= 16.0/255.0;
  yuv.gb -= vec2(0.5, 0.5);
  outColor = vec4(colorConversion * yuv, 1.0);
}
***/
// clang-format on

constexpr uint32_t kMM21ShaderVert[] = …;
// clang-format off
/***
MM21Shader.vert source:
// Vertex shader for MM21->ARGB conversion.
// We produce 2 right triangles for each MM21 tile. This allows us to
// compute a few important values in the vertex shader instead of the
// fragment shader. This is desirable because the vertex shader only runs
// 6 times (once for each vertex) per tile, which means the cost is amortized
// over 16*32/6 ~= 85 pixels.

#version 450

precision highp float;
precision highp int;

// We can actually exploit the rasterizer to figure out the intra tile
// coordinates for us.
layout(location = 0) out mediump vec2 intraTileX;
layout(location = 1) out mediump vec2 intraTileY;

layout(location = 2) flat out highp vec2 yOffset;
layout(location = 3) flat out highp vec2 xOffset;

layout( push_constant ) uniform constants {
  // Vulkan push constants have interesting alignment rules, so we use a vec2
  // when we could get away with a float just to make things simple.
  vec2 tilesPerRow;
  vec2 dims;
  vec2 planeStrides;
} pushConstants;

const vec2 kLumaTileDims = vec2(16.0, 32.0);
const vec2 kTileSize = vec2(512.0, 128.0);

const vec2 intraTileCoords[6] = vec2[6](
  vec2(16.0, 0.0),
  vec2(16.0, 32.0),
  vec2(0.0, 0.0),
  vec2(0.0, 0.0),
  vec2(16.0, 32.0),
  vec2(0.0, 32.0)
);

void main() {
  // We really want something like:
  // int tileIdx = gl_VertexIndex / 6;
  // int tileVertIdx = gl_VertexIndex % 6;
  // But integer division and modulo are *very* expensive on mobile GPUs, so
  // we use floating point multiplication, subtraction, and flooring to
  // approximate these operations.
  // 0.1 is a fudge factor to counteract floating point rounding errors.
  // Note that we multiply this value by 6, so using 0.5 like we do in the frag
  // shader isn't appropriate because that will genuinely change the integer
  // answer.
  float tileIdx = floor(float(gl_VertexIndex) * (1.0 / 6.0));
  float preciseTileIdx = tileIdx;
  tileIdx += 0.1;
  uint tileVertIdx = gl_VertexIndex - uint(tileIdx * 6.0);
  vec2 tileCoords;
  tileCoords.g = floor(tileIdx / pushConstants.tilesPerRow.x);
  tileCoords.r = floor(tileIdx - (tileCoords.g * pushConstants.tilesPerRow.x));
  vec2 pos = tileCoords * kLumaTileDims + intraTileCoords[tileVertIdx];
  pos = pos * 2.0 / pushConstants.dims - vec2(1.0, 1.0);
  gl_Position = vec4(pos, 0.0, 1.0);

  // Compute the base address for the whole tile.
  vec2 linearBase = preciseTileIdx * kTileSize;
  linearBase += 0.1;
  yOffset = floor(linearBase / pushConstants.planeStrides);
  xOffset = linearBase - (yOffset * pushConstants.planeStrides);

  vec4 intraTileCoord = vec4(intraTileCoords[tileVertIdx],
  			     intraTileCoords[tileVertIdx] / 2.0);
  intraTileX = intraTileCoord.rb;
  intraTileY = intraTileCoord.ga;
}
***/
// clang-format on

constexpr uint32_t kMT2TShaderFrag[] = …;
// clang-format off
/***
MT2TShader.frag source:
// Frag shader for MT2T->AR30 conversion.

#version 450
#extension GL_EXT_samplerless_texture_functions : require

precision mediump float;
precision mediump int;

layout(location = 0) in vec2 intraTileX;
layout(location = 1) in vec2 intraTileY;

layout(location = 2) in flat highp vec2 yOffset;
layout(location = 3) in flat highp vec2 xOffset;

layout(location = 0) out vec4 outColor;

layout( push_constant ) uniform constants {
  layout(offset = 24) highp vec2 planeStrides;
} pushConstants;

layout(binding = 0) uniform texture2D lumaTexture;
layout(binding = 1) uniform texture2D chromaTexture;

const mat3 colorConversion = mat3(1.164, 1.164, 1.164,
          0.0, -0.391, 2.018,
          1.596, -0.813, 0.0);

void main() {
  vec2 blockIdx = floor(intraTileY / 4.0);
  vec2 blockRowIdx = intraTileY - (blockIdx * 4.0);

  // MSB indices need to be adjusted by how many LSB bytes are serialized
  // before the current MSB. Every 64 pixel block starts with 16 bytes of LSB
  // data.
  highp vec2 msbLinearIdx = (floor(intraTileY) * vec2(16, 8)) +
                      floor(intraTileX) + xOffset;
  msbLinearIdx += (blockIdx + 1.0) * vec2(16, 8);

  // Likewise, we need to find the address of our LSB byte. Since each LSB byte
  // encodes the LSBs for a 1x4 mini-tile, we can compute a base address using
  // blockIdx * 16 bytes, and then offset it by the intra tile X coordinate.
  highp vec2 lsbLinearIdx = blockIdx * vec2(80, 40) + xOffset;
  lsbLinearIdx += floor(intraTileX);

  // 0.5 is a floating point issue fudge factor.
  highp vec4 linearIdx = vec4(msbLinearIdx, lsbLinearIdx);

  highp vec4 strides = vec4(pushConstants.planeStrides,
                            pushConstants.planeStrides);
  highp vec4 detiledY = floor(linearIdx / strides);
  highp vec4 detiledX = linearIdx - (detiledY * strides);
  detiledY += vec4(yOffset, yOffset);

  vec3 yuv;
  yuv.r = texelFetch(lumaTexture, ivec2(detiledX.r, detiledY.r), 0).r;
  yuv.gb = texelFetch(chromaTexture, ivec2(detiledX.g, detiledY.g), 0).rg;

  vec3 yuvLsb;
  yuvLsb.r = texelFetch(lumaTexture, ivec2(detiledX.b, detiledY.b), 0).r;
  yuvLsb.gb = texelFetch(chromaTexture, ivec2(detiledX.a, detiledY.a), 0).rg;

  // LSBs are packed into their corresponding byte with the top of the tile
  // packed into the least significant 2 bits of the byte first. So ideally,
  // we would bit shift the LSB byte based on what row within the block the
  // current pixel is in, mask all but the lower two bits, and then shift those
  // bits into where they need to go. But, because both texelFetch and outColor
  // are floating points, this requires integer conversions, which can be quite
  // slow. So instead, we use a multiplication to emulate a left shift, then
  // fract() to emulate discarding the bits that were shifted too high for
  // the register, and then division (multiplication by the reciprocal) to
  // emulate a right shift. The left shift constant should be
  // 2^(2*(3-blockRowIdx)), and the right shift constant should be 2^-8 in
  // order for this to work. The one hitch is that the floating point color
  // values range from 0.0 to 1.0, but baked into our bit shifting trick is
  // the assumption that the colors range from 0.0 to 255.0/256.0 and 0.0 to
  // 1023.0/1024.0 respectively. We can fix this by factoring into our shift
  // multiplication constants the conversion terms 255.0/256.0 and
  // 1024.0/1023.0.
  //
  // Note that the nature of floating point division means we don't emulate
  // discarding bits that are shifted too low, we just keep them as a tiny
  // fractional component. This means this approach is only approximately
  // correct. But, it is guaranteed to be correct within 1/1024, which is
  // all we need for 10-bit color accuracy.
  vec3 shift = ldexp(vec2((255.0/256.0)), ivec2(2.0 * (3.0 - blockRowIdx))).rgg;
  yuvLsb *= shift;
  yuvLsb = fract(yuvLsb);
  yuvLsb *= (1.0 / 256.0 * 1024.0 / 1023.0);
  yuv += yuvLsb;

  yuv.r -= 16.0/255.0;
  yuv.gb -= vec2(0.5, 0.5);
  outColor = vec4(colorConversion * yuv, 1.0);
}
***/
// clang-format on

constexpr uint32_t kMT2TShaderVert[] = …;
// clang-format off
/***
MT2TShader.vert source:
// Vertex shader for MT2T->AR30 conversion.
// This shader is very similar to its MM21 counterpart, with the important
// difference being that the linear base address needs to take into account the
// packed LSB data, so we multiply it by 10/8=5/4.

#version 450

precision highp float;
precision highp int;

// We can actually exploit the rasterizer to figure out the intra tile
// coordinates for us.
layout(location = 0) out mediump vec2 intraTileX;
layout(location = 1) out mediump vec2 intraTileY;

layout(location = 2) flat out vec2 yOffset;
layout(location = 3) flat out vec2 xOffset;

layout( push_constant ) uniform constants {
  // Vulkan push constants have interesting alignment rules, so we use a vec2
  // when we could get away with a float just to make things simple.
  vec2 tilesPerRow;
  vec2 dims;
  vec2 planeStrides;
} pushConstants;

const vec2 kLumaTileDims = vec2(16.0, 32.0);
const vec2 kTileSize = vec2(640.0, 160.0);

const vec2 intraTileCoords[6] = vec2[6](
  vec2(16.0, 0.0),
  vec2(16.0, 32.0),
  vec2(0.0, 0.0),
  vec2(0.0, 0.0),
  vec2(16.0, 32.0),
  vec2(0.0, 32.0)
);

void main() {
  // We really want something like:
  // int tileIdx = gl_VertexIndex / 6;
  // int tileVertIdx = gl_VertexIndex % 6;
  // But integer division and modulo are *very* expensive on mobile GPUs, so
  // we use floating point multiplication, subtraction, and flooring to
  // approximate these operations.
  // 0.1 is a fudge factor to counteract floating point rounding errors.
  // Note that we multiply this value by 6, so using 0.5 like we do in the frag
  // shader isn't appropriate because that will genuinely change the integer
  // answer.
  float tileIdx = floor(float(gl_VertexIndex) * (1.0 / 6.0));
  float preciseTileIdx = tileIdx;
  tileIdx += 0.1;
  uint tileVertIdx = gl_VertexIndex - uint(tileIdx * 6.0);
  vec2 tileCoords;
  tileCoords.g = floor(tileIdx / pushConstants.tilesPerRow.x);
  tileCoords.r = floor(tileIdx - (tileCoords.g * pushConstants.tilesPerRow.x));
  vec2 pos = tileCoords * kLumaTileDims + intraTileCoords[tileVertIdx];
  pos = pos * 2.0 / pushConstants.dims - vec2(1.0, 1.0);
  gl_Position = vec4(pos, 0.0, 1.0);

  // Compute the base address for the whole tile.
  vec2 linearBase = preciseTileIdx * kTileSize;
  yOffset = floor(linearBase / pushConstants.planeStrides);
  xOffset = linearBase - (yOffset * pushConstants.planeStrides);

  vec4 intraTileCoord = vec4(intraTileCoords[tileVertIdx],
  			     intraTileCoords[tileVertIdx] / 2.0);
  intraTileX = intraTileCoord.rb;
  intraTileY = intraTileCoord.ga;
}
***/
// clang-format on

}  // namespace

#endif
chromium/media/gpu/chromeos/shaders/shaders.h