intel_ggtt_fencing.c | Explore in Territory

// SPDX-License-Identifier: MIT
/*
 * Copyright © 2008-2015 Intel Corporation
 */

#include <linux/highmem.h>

#include "display/intel_display.h"
#include "i915_drv.h"
#include "i915_reg.h"
#include "i915_scatterlist.h"
#include "i915_pvinfo.h"
#include "i915_vgpu.h"
#include "intel_gt_regs.h"
#include "intel_mchbar_regs.h"

/**
 * DOC: fence register handling
 *
 * Important to avoid confusions: "fences" in the i915 driver are not execution
 * fences used to track command completion but hardware detiler objects which
 * wrap a given range of the global GTT. Each platform has only a fairly limited
 * set of these objects.
 *
 * Fences are used to detile GTT memory mappings. They're also connected to the
 * hardware frontbuffer render tracking and hence interact with frontbuffer
 * compression. Furthermore on older platforms fences are required for tiled
 * objects used by the display engine. They can also be used by the render
 * engine - they're required for blitter commands and are optional for render
 * commands. But on gen4+ both display (with the exception of fbc) and rendering
 * have their own tiling state bits and don't need fences.
 *
 * Also note that fences only support X and Y tiling and hence can't be used for
 * the fancier new tiling formats like W, Ys and Yf.
 *
 * Finally note that because fences are such a restricted resource they're
 * dynamically associated with objects. Furthermore fence state is committed to
 * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must
 * explicitly call i915_gem_object_get_fence() to synchronize fencing status
 * for cpu access. Also note that some code wants an unfenced view, for those
 * cases the fence can be removed forcefully with i915_gem_object_put_fence().
 *
 * Internally these functions will synchronize with userspace access by removing
 * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
 */

#define pipelined …

static struct drm_i915_private *fence_to_i915(struct i915_fence_reg *fence)
{ … }

static struct intel_uncore *fence_to_uncore(struct i915_fence_reg *fence)
{ … }

static void i965_write_fence_reg(struct i915_fence_reg *fence)
{ … }

static void i915_write_fence_reg(struct i915_fence_reg *fence)
{ … }

static void i830_write_fence_reg(struct i915_fence_reg *fence)
{ … }

static void fence_write(struct i915_fence_reg *fence)
{ … }

static bool gpu_uses_fence_registers(struct i915_fence_reg *fence)
{ … }

static int fence_update(struct i915_fence_reg *fence,
			struct i915_vma *vma)
{ … }

/**
 * i915_vma_revoke_fence - force-remove fence for a VMA
 * @vma: vma to map linearly (not through a fence reg)
 *
 * This function force-removes any fence from the given object, which is useful
 * if the kernel wants to do untiled GTT access.
 */
void i915_vma_revoke_fence(struct i915_vma *vma)
{ … }

static bool fence_is_active(const struct i915_fence_reg *fence)
{ … }

static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
{ … }

int __i915_vma_pin_fence(struct i915_vma *vma)
{ … }

/**
 * i915_vma_pin_fence - set up fencing for a vma
 * @vma: vma to map through a fence reg
 *
 * When mapping objects through the GTT, userspace wants to be able to write
 * to them without having to worry about swizzling if the object is tiled.
 * This function walks the fence regs looking for a free one for @obj,
 * stealing one if it can't find any.
 *
 * It then sets up the reg based on the object's properties: address, pitch
 * and tiling format.
 *
 * For an untiled surface, this removes any existing fence.
 *
 * Returns:
 * 0 on success, negative error code on failure.
 */
int i915_vma_pin_fence(struct i915_vma *vma)
{ … }

/**
 * i915_reserve_fence - Reserve a fence for vGPU
 * @ggtt: Global GTT
 *
 * This function walks the fence regs looking for a free one and remove
 * it from the fence_list. It is used to reserve fence for vGPU to use.
 */
struct i915_fence_reg *i915_reserve_fence(struct i915_ggtt *ggtt)
{ … }

/**
 * i915_unreserve_fence - Reclaim a reserved fence
 * @fence: the fence reg
 *
 * This function add a reserved fence register from vGPU to the fence_list.
 */
void i915_unreserve_fence(struct i915_fence_reg *fence)
{ … }

/**
 * intel_ggtt_restore_fences - restore fence state
 * @ggtt: Global GTT
 *
 * Restore the hw fence state to match the software tracking again, to be called
 * after a gpu reset and on resume. Note that on runtime suspend we only cancel
 * the fences, to be reacquired by the user later.
 */
void intel_ggtt_restore_fences(struct i915_ggtt *ggtt)
{ … }

/**
 * DOC: tiling swizzling details
 *
 * The idea behind tiling is to increase cache hit rates by rearranging
 * pixel data so that a group of pixel accesses are in the same cacheline.
 * Performance improvement from doing this on the back/depth buffer are on
 * the order of 30%.
 *
 * Intel architectures make this somewhat more complicated, though, by
 * adjustments made to addressing of data when the memory is in interleaved
 * mode (matched pairs of DIMMS) to improve memory bandwidth.
 * For interleaved memory, the CPU sends every sequential 64 bytes
 * to an alternate memory channel so it can get the bandwidth from both.
 *
 * The GPU also rearranges its accesses for increased bandwidth to interleaved
 * memory, and it matches what the CPU does for non-tiled.  However, when tiled
 * it does it a little differently, since one walks addresses not just in the
 * X direction but also Y.  So, along with alternating channels when bit
 * 6 of the address flips, it also alternates when other bits flip --  Bits 9
 * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
 * are common to both the 915 and 965-class hardware.
 *
 * The CPU also sometimes XORs in higher bits as well, to improve
 * bandwidth doing strided access like we do so frequently in graphics.  This
 * is called "Channel XOR Randomization" in the MCH documentation.  The result
 * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
 * decode.
 *
 * All of this bit 6 XORing has an effect on our memory management,
 * as we need to make sure that the 3d driver can correctly address object
 * contents.
 *
 * If we don't have interleaved memory, all tiling is safe and no swizzling is
 * required.
 *
 * When bit 17 is XORed in, we simply refuse to tile at all.  Bit
 * 17 is not just a page offset, so as we page an object out and back in,
 * individual pages in it will have different bit 17 addresses, resulting in
 * each 64 bytes being swapped with its neighbor!
 *
 * Otherwise, if interleaved, we have to tell the 3d driver what the address
 * swizzling it needs to do is, since it's writing with the CPU to the pages
 * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
 * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
 * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
 * to match what the GPU expects.
 */

/**
 * detect_bit_6_swizzle - detect bit 6 swizzling pattern
 * @ggtt: Global GGTT
 *
 * Detects bit 6 swizzling of address lookup between IGD access and CPU
 * access through main memory.
 */
static void detect_bit_6_swizzle(struct i915_ggtt *ggtt)
{ … }

/*
 * Swap every 64 bytes of this page around, to account for it having a new
 * bit 17 of its physical address and therefore being interpreted differently
 * by the GPU.
 */
static void swizzle_page(struct page *page)
{ … }

/**
 * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling
 * @obj: i915 GEM buffer object
 * @pages: the scattergather list of physical pages
 *
 * This function fixes up the swizzling in case any page frame number for this
 * object has changed in bit 17 since that state has been saved with
 * i915_gem_object_save_bit_17_swizzle().
 *
 * This is called when pinning backing storage again, since the kernel is free
 * to move unpinned backing storage around (either by directly moving pages or
 * by swapping them out and back in again).
 */
void
i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
				  struct sg_table *pages)
{ … }

/**
 * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling
 * @obj: i915 GEM buffer object
 * @pages: the scattergather list of physical pages
 *
 * This function saves the bit 17 of each page frame number so that swizzling
 * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must
 * be called before the backing storage can be unpinned.
 */
void
i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
				    struct sg_table *pages)
{ … }

void intel_ggtt_init_fences(struct i915_ggtt *ggtt)
{ … }

void intel_ggtt_fini_fences(struct i915_ggtt *ggtt)
{ … }

void intel_gt_init_swizzling(struct intel_gt *gt)
{ … }
linux/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c