panthor_sched.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0 or MIT
/* Copyright 2023 Collabora ltd. */

#include <drm/drm_drv.h>
#include <drm/drm_exec.h>
#include <drm/drm_gem_shmem_helper.h>
#include <drm/drm_managed.h>
#include <drm/gpu_scheduler.h>
#include <drm/panthor_drm.h>

#include <linux/build_bug.h>
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/dma-resv.h>
#include <linux/firmware.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/iopoll.h>
#include <linux/iosys-map.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>

#include "panthor_devfreq.h"
#include "panthor_device.h"
#include "panthor_fw.h"
#include "panthor_gem.h"
#include "panthor_gpu.h"
#include "panthor_heap.h"
#include "panthor_mmu.h"
#include "panthor_regs.h"
#include "panthor_sched.h"

/**
 * DOC: Scheduler
 *
 * Mali CSF hardware adopts a firmware-assisted scheduling model, where
 * the firmware takes care of scheduling aspects, to some extent.
 *
 * The scheduling happens at the scheduling group level, each group
 * contains 1 to N queues (N is FW/hardware dependent, and exposed
 * through the firmware interface). Each queue is assigned a command
 * stream ring buffer, which serves as a way to get jobs submitted to
 * the GPU, among other things.
 *
 * The firmware can schedule a maximum of M groups (M is FW/hardware
 * dependent, and exposed through the firmware interface). Passed
 * this maximum number of groups, the kernel must take care of
 * rotating the groups passed to the firmware so every group gets
 * a chance to have his queues scheduled for execution.
 *
 * The current implementation only supports with kernel-mode queues.
 * In other terms, userspace doesn't have access to the ring-buffer.
 * Instead, userspace passes indirect command stream buffers that are
 * called from the queue ring-buffer by the kernel using a pre-defined
 * sequence of command stream instructions to ensure the userspace driver
 * always gets consistent results (cache maintenance,
 * synchronization, ...).
 *
 * We rely on the drm_gpu_scheduler framework to deal with job
 * dependencies and submission. As any other driver dealing with a
 * FW-scheduler, we use the 1:1 entity:scheduler mode, such that each
 * entity has its own job scheduler. When a job is ready to be executed
 * (all its dependencies are met), it is pushed to the appropriate
 * queue ring-buffer, and the group is scheduled for execution if it
 * wasn't already active.
 *
 * Kernel-side group scheduling is timeslice-based. When we have less
 * groups than there are slots, the periodic tick is disabled and we
 * just let the FW schedule the active groups. When there are more
 * groups than slots, we let each group a chance to execute stuff for
 * a given amount of time, and then re-evaluate and pick new groups
 * to schedule. The group selection algorithm is based on
 * priority+round-robin.
 *
 * Even though user-mode queues is out of the scope right now, the
 * current design takes them into account by avoiding any guess on the
 * group/queue state that would be based on information we wouldn't have
 * if userspace was in charge of the ring-buffer. That's also one of the
 * reason we don't do 'cooperative' scheduling (encoding FW group slot
 * reservation as dma_fence that would be returned from the
 * drm_gpu_scheduler::prepare_job() hook, and treating group rotation as
 * a queue of waiters, ordered by job submission order). This approach
 * would work for kernel-mode queues, but would make user-mode queues a
 * lot more complicated to retrofit.
 */

#define JOB_TIMEOUT_MS …

#define MIN_CS_PER_CSG …

#define MIN_CSGS …
#define MAX_CSG_PRIO …

struct panthor_group;

/**
 * struct panthor_csg_slot - Command stream group slot
 *
 * This represents a FW slot for a scheduling group.
 */
struct panthor_csg_slot { … };

/**
 * enum panthor_csg_priority - Group priority
 */
enum panthor_csg_priority { … };

/**
 * struct panthor_scheduler - Object used to manage the scheduler
 */
struct panthor_scheduler { … };

/**
 * struct panthor_syncobj_32b - 32-bit FW synchronization object
 */
struct panthor_syncobj_32b { … };

/**
 * struct panthor_syncobj_64b - 64-bit FW synchronization object
 */
struct panthor_syncobj_64b { … };

/**
 * struct panthor_queue - Execution queue
 */
struct panthor_queue { … };

/**
 * enum panthor_group_state - Scheduling group state.
 */
enum panthor_group_state { … };

/**
 * struct panthor_group - Scheduling group object
 */
struct panthor_group { … };

/**
 * group_queue_work() - Queue a group work
 * @group: Group to queue the work for.
 * @wname: Work name.
 *
 * Grabs a ref and queue a work item to the scheduler workqueue. If
 * the work was already queued, we release the reference we grabbed.
 *
 * Work callbacks must release the reference we grabbed here.
 */
#define group_queue_work(group, wname) …

/**
 * sched_queue_work() - Queue a scheduler work.
 * @sched: Scheduler object.
 * @wname: Work name.
 *
 * Conditionally queues a scheduler work if no reset is pending/in-progress.
 */
#define sched_queue_work(sched, wname) …

/**
 * sched_queue_delayed_work() - Queue a scheduler delayed work.
 * @sched: Scheduler object.
 * @wname: Work name.
 * @delay: Work delay in jiffies.
 *
 * Conditionally queues a scheduler delayed work if no reset is
 * pending/in-progress.
 */
#define sched_queue_delayed_work(sched, wname, delay) …

/*
 * We currently set the maximum of groups per file to an arbitrary low value.
 * But this can be updated if we need more.
 */
#define MAX_GROUPS_PER_POOL …

/**
 * struct panthor_group_pool - Group pool
 *
 * Each file get assigned a group pool.
 */
struct panthor_group_pool { … };

/**
 * struct panthor_job - Used to manage GPU job
 */
struct panthor_job { … };

static void
panthor_queue_put_syncwait_obj(struct panthor_queue *queue)
{ … }

static void *
panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue *queue)
{ … }

static void group_free_queue(struct panthor_group *group, struct panthor_queue *queue)
{ … }

static void group_release_work(struct work_struct *work)
{ … }

static void group_release(struct kref *kref)
{ … }

static void group_put(struct panthor_group *group)
{ … }

static struct panthor_group *
group_get(struct panthor_group *group)
{ … }

/**
 * group_bind_locked() - Bind a group to a group slot
 * @group: Group.
 * @csg_id: Slot.
 *
 * Return: 0 on success, a negative error code otherwise.
 */
static int
group_bind_locked(struct panthor_group *group, u32 csg_id)
{ … }

/**
 * group_unbind_locked() - Unbind a group from a slot.
 * @group: Group to unbind.
 *
 * Return: 0 on success, a negative error code otherwise.
 */
static int
group_unbind_locked(struct panthor_group *group)
{ … }

/**
 * cs_slot_prog_locked() - Program a queue slot
 * @ptdev: Device.
 * @csg_id: Group slot ID.
 * @cs_id: Queue slot ID.
 *
 * Program a queue slot with the queue information so things can start being
 * executed on this queue.
 *
 * The group slot must have a group bound to it already (group_bind_locked()).
 */
static void
cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
{ … }

/**
 * cs_slot_reset_locked() - Reset a queue slot
 * @ptdev: Device.
 * @csg_id: Group slot.
 * @cs_id: Queue slot.
 *
 * Change the queue slot state to STOP and suspend the queue timeout if
 * the queue is not blocked.
 *
 * The group slot must have a group bound to it (group_bind_locked()).
 */
static int
cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
{ … }

/**
 * csg_slot_sync_priority_locked() - Synchronize the group slot priority
 * @ptdev: Device.
 * @csg_id: Group slot ID.
 *
 * Group slot priority update happens asynchronously. When we receive a
 * %CSG_ENDPOINT_CONFIG, we know the update is effective, and can
 * reflect it to our panthor_csg_slot object.
 */
static void
csg_slot_sync_priority_locked(struct panthor_device *ptdev, u32 csg_id)
{ … }

/**
 * cs_slot_sync_queue_state_locked() - Synchronize the queue slot priority
 * @ptdev: Device.
 * @csg_id: Group slot.
 * @cs_id: Queue slot.
 *
 * Queue state is updated on group suspend or STATUS_UPDATE event.
 */
static void
cs_slot_sync_queue_state_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
{ … }

static void
csg_slot_sync_queues_state_locked(struct panthor_device *ptdev, u32 csg_id)
{ … }

static void
csg_slot_sync_state_locked(struct panthor_device *ptdev, u32 csg_id)
{ … }

static int
csg_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 priority)
{ … }

static void
cs_slot_process_fatal_event_locked(struct panthor_device *ptdev,
				   u32 csg_id, u32 cs_id)
{ … }

static void
cs_slot_process_fault_event_locked(struct panthor_device *ptdev,
				   u32 csg_id, u32 cs_id)
{ … }

static int group_process_tiler_oom(struct panthor_group *group, u32 cs_id)
{ … }

static void group_tiler_oom_work(struct work_struct *work)
{ … }

static void
cs_slot_process_tiler_oom_event_locked(struct panthor_device *ptdev,
				       u32 csg_id, u32 cs_id)
{ … }

static bool cs_slot_process_irq_locked(struct panthor_device *ptdev,
				       u32 csg_id, u32 cs_id)
{ … }

static void csg_slot_sync_idle_state_locked(struct panthor_device *ptdev, u32 csg_id)
{ … }

static void csg_slot_process_idle_event_locked(struct panthor_device *ptdev, u32 csg_id)
{ … }

static void csg_slot_sync_update_locked(struct panthor_device *ptdev,
					u32 csg_id)
{ … }

static void
csg_slot_process_progress_timer_event_locked(struct panthor_device *ptdev, u32 csg_id)
{ … }

static void sched_process_csg_irq_locked(struct panthor_device *ptdev, u32 csg_id)
{ … }

static void sched_process_idle_event_locked(struct panthor_device *ptdev)
{ … }

/**
 * sched_process_global_irq_locked() - Process the scheduling part of a global IRQ
 * @ptdev: Device.
 */
static void sched_process_global_irq_locked(struct panthor_device *ptdev)
{ … }

static void process_fw_events_work(struct work_struct *work)
{ … }

/**
 * panthor_sched_report_fw_events() - Report FW events to the scheduler.
 */
void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events)
{ … }

static const char *fence_get_driver_name(struct dma_fence *fence)
{ … }

static const char *queue_fence_get_timeline_name(struct dma_fence *fence)
{ … }

static const struct dma_fence_ops panthor_queue_fence_ops = …;

struct panthor_csg_slots_upd_ctx { … };

static void csgs_upd_ctx_init(struct panthor_csg_slots_upd_ctx *ctx)
{ … }

static void csgs_upd_ctx_queue_reqs(struct panthor_device *ptdev,
				    struct panthor_csg_slots_upd_ctx *ctx,
				    u32 csg_id, u32 value, u32 mask)
{ … }

static int csgs_upd_ctx_apply_locked(struct panthor_device *ptdev,
				     struct panthor_csg_slots_upd_ctx *ctx)
{ … }

struct panthor_sched_tick_ctx { … };

static bool
tick_ctx_is_full(const struct panthor_scheduler *sched,
		 const struct panthor_sched_tick_ctx *ctx)
{ … }

static bool
group_is_idle(struct panthor_group *group)
{ … }

static bool
group_can_run(struct panthor_group *group)
{ … }

static void
tick_ctx_pick_groups_from_list(const struct panthor_scheduler *sched,
			       struct panthor_sched_tick_ctx *ctx,
			       struct list_head *queue,
			       bool skip_idle_groups,
			       bool owned_by_tick_ctx)
{ … }

static void
tick_ctx_insert_old_group(struct panthor_scheduler *sched,
			  struct panthor_sched_tick_ctx *ctx,
			  struct panthor_group *group,
			  bool full_tick)
{ … }

static void
tick_ctx_init(struct panthor_scheduler *sched,
	      struct panthor_sched_tick_ctx *ctx,
	      bool full_tick)
{ … }

#define NUM_INSTRS_PER_SLOT …

static void
group_term_post_processing(struct panthor_group *group)
{ … }

static void group_term_work(struct work_struct *work)
{ … }

static void
tick_ctx_cleanup(struct panthor_scheduler *sched,
		 struct panthor_sched_tick_ctx *ctx)
{ … }

static void
tick_ctx_apply(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *ctx)
{ … }

static u64
tick_ctx_update_resched_target(struct panthor_scheduler *sched,
			       const struct panthor_sched_tick_ctx *ctx)
{ … }

static void tick_work(struct work_struct *work)
{ … }

static int panthor_queue_eval_syncwait(struct panthor_group *group, u8 queue_idx)
{ … }

static void sync_upd_work(struct work_struct *work)
{ … }

static void group_schedule_locked(struct panthor_group *group, u32 queue_mask)
{ … }

static void queue_stop(struct panthor_queue *queue,
		       struct panthor_job *bad_job)
{ … }

static void queue_start(struct panthor_queue *queue)
{ … }

static void panthor_group_stop(struct panthor_group *group)
{ … }

static void panthor_group_start(struct panthor_group *group)
{ … }

static void panthor_sched_immediate_tick(struct panthor_device *ptdev)
{ … }

/**
 * panthor_sched_report_mmu_fault() - Report MMU faults to the scheduler.
 */
void panthor_sched_report_mmu_fault(struct panthor_device *ptdev)
{ … }

void panthor_sched_resume(struct panthor_device *ptdev)
{ … }

void panthor_sched_suspend(struct panthor_device *ptdev)
{ … }

void panthor_sched_pre_reset(struct panthor_device *ptdev)
{ … }

void panthor_sched_post_reset(struct panthor_device *ptdev, bool reset_failed)
{ … }

static void group_sync_upd_work(struct work_struct *work)
{ … }

static struct dma_fence *
queue_run_job(struct drm_sched_job *sched_job)
{ … }

static enum drm_gpu_sched_stat
queue_timedout_job(struct drm_sched_job *sched_job)
{ … }

static void queue_free_job(struct drm_sched_job *sched_job)
{ … }

static const struct drm_sched_backend_ops panthor_queue_sched_ops = …;

static struct panthor_queue *
group_create_queue(struct panthor_group *group,
		   const struct drm_panthor_queue_create *args)
{ … }

#define MAX_GROUPS_PER_POOL …

int panthor_group_create(struct panthor_file *pfile,
			 const struct drm_panthor_group_create *group_args,
			 const struct drm_panthor_queue_create *queue_args)
{ … }

int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle)
{ … }

int panthor_group_get_state(struct panthor_file *pfile,
			    struct drm_panthor_group_get_state *get_state)
{ … }

int panthor_group_pool_create(struct panthor_file *pfile)
{ … }

void panthor_group_pool_destroy(struct panthor_file *pfile)
{ … }

static void job_release(struct kref *ref)
{ … }

struct drm_sched_job *panthor_job_get(struct drm_sched_job *sched_job)
{ … }

void panthor_job_put(struct drm_sched_job *sched_job)
{ … }

struct panthor_vm *panthor_job_vm(struct drm_sched_job *sched_job)
{ … }

struct drm_sched_job *
panthor_job_create(struct panthor_file *pfile,
		   u16 group_handle,
		   const struct drm_panthor_queue_submit *qsubmit)
{ … }

void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *sched_job)
{ … }

void panthor_sched_unplug(struct panthor_device *ptdev)
{ … }

static void panthor_sched_fini(struct drm_device *ddev, void *res)
{ … }

int panthor_sched_init(struct panthor_device *ptdev)
{ … }
linux/drivers/gpu/drm/panthor/panthor_sched.c