BlitTextureToBuffer.cpp | Explore in Territory

// Copyright 2023 The Dawn & Tint Authors
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
//    list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
//    this list of conditions and the following disclaimer in the documentation
//    and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
//    contributors may be used to endorse or promote products derived from
//    this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "dawn/native/BlitTextureToBuffer.h"

#include <algorithm>
#include <array>
#include <string>
#include <string_view>
#include <utility>

#include "dawn/common/Assert.h"
#include "dawn/native/BindGroup.h"
#include "dawn/native/CommandBuffer.h"
#include "dawn/native/CommandEncoder.h"
#include "dawn/native/CommandValidation.h"
#include "dawn/native/ComputePassEncoder.h"
#include "dawn/native/ComputePipeline.h"
#include "dawn/native/Device.h"
#include "dawn/native/InternalPipelineStore.h"
#include "dawn/native/PhysicalDevice.h"
#include "dawn/native/Queue.h"
#include "dawn/native/Sampler.h"
#include "dawn/native/utils/WGPUHelpers.h"

namespace dawn::native {

namespace {

constexpr uint32_t kWorkgroupSizeX = …;
constexpr uint32_t kWorkgroupSizeY = …;

constexpr std::string_view kDstBufferU32 = …;

// For DepthFloat32 we can directly use f32 for the buffer array data type as we don't need packing.
constexpr std::string_view kDstBufferF32 = …;

constexpr std::string_view kFloatTexture1D = …;

constexpr std::string_view kFloatTexture2D = …;

constexpr std::string_view kFloatTexture2DArray = …;

constexpr std::string_view kFloatTexture3D = …;

// Cube map reference: https://en.wikipedia.org/wiki/Cube_mapping
// Function converting texel coord to sample st coord for cube texture.
constexpr std::string_view kCubeCoordCommon = …;

constexpr std::string_view kFloatTextureCube = …;

constexpr std::string_view kUintTexture = …;

constexpr std::string_view kUintTextureArray = …;

// textureSampleLevel doesn't support texture_cube<u32>
// Use textureGather as a workaround.
// Always choose the texel with the smallest coord (stored in w component).
// Since this is only used for Stencil8 (1 channel), we only care component idx == 0.
constexpr std::string_view kUintTextureCube = …;

constexpr std::string_view kEncodeRGBA8UnormInU32 = …;

constexpr std::string_view kEncodeRGBA8SnormInU32 = …;

// Storing and swizzling bgra8unorm texel values and convert to u32.
constexpr std::string_view kEncodeBGRA8UnormInU32 = …;

constexpr std::string_view kEncodeRG16FloatInU32 = …;

// Each thread is responsible for reading (packTexelCount) texel and packing them into a 4-byte u32.
constexpr std::string_view kCommonHead = …;

constexpr std::string_view kCommonStart = …;

constexpr std::string_view kCommonEnd = …;

constexpr std::string_view kPackStencil8ToU32 = …;

// Color format R8Snorm and RG8Snorm T2B copy doesn't require offset to be multiple of 4 bytes,
// making it more complicated than other formats.
// TODO(dawn:1886): potentially separate "middle of the image" case
// and "on the edge" case into different shaders and passes for better performance.
constexpr std::string_view kNonMultipleOf4OffsetStart = …;

// R8snorm: texelByte = 1; each thread reads 1 ~ 4 texels.
// Different scenarios are listed below:
//
// * In the middle of the row: reads 4 texels
//       |  x  | x+1 | x+2 | x+3 |
//
// * At the edge of the row: when offset % 4 > 0
//   - when copyWidth % bytesPerRow == 0 (compact row), read 4 texels
//       e.g. offset = 1; copyWidth = 256;
//       | 255,y-1 | 0,y | 1,y | 2,y |
//   - when copyWidth % bytesPerRow > 0 || rowsPerImage > copyHeight (sparse row / sparse image)
//     One more thread is added to the end of each row,
//     reads 1 ~ 3 texels, reads dst buf values
//       e.g. offset = 1; copyWidth = 128; mask = 0xffffff00;
//       | 127,y-1 |  b  |  b  |  b  |
//   - when copyWidth % bytesPerRow > 0 && copyWidth + offset % 4 > bytesPerRow (special case)
//     reads 1 ~ 3 texels, reads dst buf values; mask = 0x0000ff00;
//       e.g. offset = 1; copyWidth = 255;
//       | 254,y-1 |  b  | 0,y | 1,y |
//
// * At the start of the whole copy:
//   - when offset % 4 == 0, reads 4 texels
//   - when offset % 4 > 0, reads 1 ~ 3 texels, reads dst buf values
//       e.g. offset = 1; mask = 0x000000ff;
//       |  b  |  0  |  1  |  2  |
//       e.g. offset = 1, copyWidth = 2; mask = 0xff0000ff;
//       |  b  |  0  |  1  |  b  |
//
// * At the end of the whole copy:
//   - reads 1 ~ 4 texels, reads dst buf values;
//       e.g. offset = 0; copyWidth = 256;
//       | 252 | 253 | 254 | 255 |
//       e.g. offset = 1; copyWidth = 256; mask = 0xffffff00;
//       | 255 |  b  |  b  |  b  |

constexpr std::string_view kPackR8ToU32 = …;

// RG8snorm: texelByte = 2; each thread reads 1 ~ 2 texels.
// Different scenarios are listed below:
//
// * In the middle of the row: reads 2 texels
//       |    x    |   x+1   |
//
// * At the edge of the row: when offset % 4 > 0
//   - when copyWidth % bytesPerRow == 0 (compact row), read 2 texels
//       e.g. offset = 2; copyWidth = 128;
//       | 127,y-1 |   0,y   |
//   - when copyWidth % bytesPerRow > 0 || rowsPerImage > copyHeight (sparse row / sparse image)
//     One more thread is added to the end of each row,
//     reads 1 texels, reads dst buf values
//       e.g. offset = 1; copyWidth = 64; mask = 0xffff0000;
//       |  63,y-1 |    b    |
//
// * At the start of the whole copy:
//   - when offset % 4 == 0, reads 2 texels
//   - when offset % 4 > 0, reads 1 texels, reads dst buf values
//       e.g. offset = 2; mask = 0x0000ffff;
//       |    b    |    0    |
//
// * At the end of the whole copy:
//   - reads 1 ~ 2 texels, reads dst buf values;
//       e.g. offset = 0; copyWidth = 128;
//       |   126   |   127   |
//       e.g. offset = 1; copyWidth = 128; mask = 0xffff0000;
//       |   127   |    b    |

constexpr std::string_view kPackRG8ToU32 = …;

// R16: texelByte = 2; each thread reads 1 ~ 2 texels.
// General packing algorithm is similar to kPackRG8ToU32.
constexpr std::string_view kPackR16ToU32 = …;

constexpr std::string_view kPackRG16ToU32 = …;

// Load RGBA16 and pack to 2 uint4_t
constexpr std::string_view kLoadRGBA16ToU32 = …;

// ShaderF16 extension is only enabled by GL_AMD_gpu_shader_half_float for GL
// so we should not use it generally for the emulation.
// As a result we are using f32 and array<u32> to do all the math and byte manipulation.
// If we have 2-byte scalar type (f16, u16) it can be a bit easier when writing to the storage
// buffer.
constexpr std::string_view kPackDepth16UnormToU32 = …;

// Storing rgba texel values
// later called by encodeVectorInU32General to convert to u32.
constexpr std::string_view kPackRGBAToU32 = …;

// Storing rgb9e5ufloat texel values
// In this format float is represented as
// 2^(exponent - bias) * (mantissa / 2^numMantissaBits)
// Packing algorithm is from:
// https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt
//
// Note: there are multiple bytes that could represent the same value in this format.
// e.g.
// 0x0a090807 and 0x0412100e both unpack to
// [8.344650268554688e-7, 0.000015735626220703125, 0.000015497207641601562]
// So the bytes copied via blit could be different.
constexpr std::string_view kEncodeRGB9E5UfloatInU32 = …;

// Directly loading R32Float values into dst_buf
// No bit manipulation and packing is needed.
constexpr std::string_view kLoadR32Float = …;

ResultOrError<Ref<ComputePipelineBase>> GetOrCreateTextureToBufferPipeline(
    DeviceBase* device,
    const TextureCopy& src,
    wgpu::TextureViewDimension viewDimension) { … }

}  // anonymous namespace

bool IsFormatSupportedByTextureToBufferBlit(wgpu::TextureFormat format) { … }

MaybeError BlitTextureToBuffer(DeviceBase* device,
                               CommandEncoder* commandEncoder,
                               const TextureCopy& src,
                               const BufferCopy& dst,
                               const Extent3D& copyExtent) { … }

}  // namespace dawn::native
chromium/third_party/dawn/src/dawn/native/BlitTextureToBuffer.cpp