command_submission.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0

/*
 * Copyright 2016-2021 HabanaLabs, Ltd.
 * All Rights Reserved.
 */

#include <uapi/drm/habanalabs_accel.h>
#include "habanalabs.h"

#include <linux/uaccess.h>
#include <linux/slab.h>

#define HL_CS_FLAGS_TYPE_MASK …


#define MAX_TS_ITER_NUM …

/**
 * enum hl_cs_wait_status - cs wait status
 * @CS_WAIT_STATUS_BUSY: cs was not completed yet
 * @CS_WAIT_STATUS_COMPLETED: cs completed
 * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone
 */
enum hl_cs_wait_status { … };

/*
 * Data used while handling wait/timestamp nodes.
 * The purpose of this struct is to store the needed data for both operations
 * in one variable instead of passing large number of arguments to functions.
 */
struct wait_interrupt_data { … };

static void job_wq_completion(struct work_struct *work);
static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq,
				enum hl_cs_wait_status *status, s64 *timestamp);
static void cs_do_release(struct kref *ref);

static void hl_push_cs_outcome(struct hl_device *hdev,
			       struct hl_cs_outcome_store *outcome_store,
			       u64 seq, ktime_t ts, int error)
{ … }

static bool hl_pop_cs_outcome(struct hl_cs_outcome_store *outcome_store,
			       u64 seq, ktime_t *ts, int *error)
{ … }

static void hl_sob_reset(struct kref *ref)
{ … }

void hl_sob_reset_error(struct kref *ref)
{ … }

void hw_sob_put(struct hl_hw_sob *hw_sob)
{ … }

static void hw_sob_put_err(struct hl_hw_sob *hw_sob)
{ … }

void hw_sob_get(struct hl_hw_sob *hw_sob)
{ … }

/**
 * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
 * @sob_base: sob base id
 * @sob_mask: sob user mask, each bit represents a sob offset from sob base
 * @mask: generated mask
 *
 * Return: 0 if given parameters are valid
 */
int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
{ … }

static void hl_fence_release(struct kref *kref)
{ … }

void hl_fence_put(struct hl_fence *fence)
{ … }

void hl_fences_put(struct hl_fence **fence, int len)
{ … }

void hl_fence_get(struct hl_fence *fence)
{ … }

static void hl_fence_init(struct hl_fence *fence, u64 sequence)
{ … }

void cs_get(struct hl_cs *cs)
{ … }

static int cs_get_unless_zero(struct hl_cs *cs)
{ … }

static void cs_put(struct hl_cs *cs)
{ … }

static void cs_job_do_release(struct kref *ref)
{ … }

static void hl_cs_job_put(struct hl_cs_job *job)
{ … }

bool cs_needs_completion(struct hl_cs *cs)
{ … }

bool cs_needs_timeout(struct hl_cs *cs)
{ … }

static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
{ … }

/*
 * cs_parser - parse the user command submission
 *
 * @hpriv	: pointer to the private data of the fd
 * @job        : pointer to the job that holds the command submission info
 *
 * The function parses the command submission of the user. It calls the
 * ASIC specific parser, which returns a list of memory blocks to send
 * to the device as different command buffers
 *
 */
static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
{ … }

static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job)
{ … }

/*
 * hl_staged_cs_find_first - locate the first CS in this staged submission
 *
 * @hdev: pointer to device structure
 * @cs_seq: staged submission sequence number
 *
 * @note: This function must be called under 'hdev->cs_mirror_lock'
 *
 * Find and return a CS pointer with the given sequence
 */
struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq)
{ … }

/*
 * is_staged_cs_last_exists - returns true if the last CS in sequence exists
 *
 * @hdev: pointer to device structure
 * @cs: staged submission member
 *
 */
bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs)
{ … }

/*
 * staged_cs_get - get CS reference if this CS is a part of a staged CS
 *
 * @hdev: pointer to device structure
 * @cs: current CS
 * @cs_seq: staged submission sequence number
 *
 * Increment CS reference for every CS in this staged submission except for
 * the CS which get completion.
 */
static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs)
{ … }

/*
 * staged_cs_put - put a CS in case it is part of staged submission
 *
 * @hdev: pointer to device structure
 * @cs: CS to put
 *
 * This function decrements a CS reference (for a non completion CS)
 */
static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
{ … }

static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
{ … }

/*
 * force_complete_multi_cs - complete all contexts that wait on multi-CS
 *
 * @hdev: pointer to habanalabs device structure
 */
static void force_complete_multi_cs(struct hl_device *hdev)
{ … }

/*
 * complete_multi_cs - complete all waiting entities on multi-CS
 *
 * @hdev: pointer to habanalabs device structure
 * @cs: CS structure
 * The function signals a waiting entity that has an overlapping stream masters
 * with the completed CS.
 * For example:
 * - a completed CS worked on stream master QID 4, multi CS completion
 *   is actively waiting on stream master QIDs 3, 5. don't send signal as no
 *   common stream master QID
 * - a completed CS worked on stream master QID 4, multi CS completion
 *   is actively waiting on stream master QIDs 3, 4. send signal as stream
 *   master QID 4 is common
 */
static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
{ … }

static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
					struct hl_cs *cs,
					struct hl_cs_compl *hl_cs_cmpl)
{ … }

static void cs_do_release(struct kref *ref)
{ … }

static void cs_timedout(struct work_struct *work)
{ … }

static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
			enum hl_cs_type cs_type, u64 user_sequence,
			struct hl_cs **cs_new, u32 flags, u32 timeout)
{ … }

static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
{ … }

/*
 * release_reserved_encaps_signals() - release reserved encapsulated signals.
 * @hdev: pointer to habanalabs device structure
 *
 * Release reserved encapsulated signals which weren't un-reserved, or for which a CS with
 * encapsulated signals wasn't submitted and thus weren't released as part of CS roll-back.
 * For these signals need also to put the refcount of the H/W SOB which was taken at the
 * reservation.
 */
static void release_reserved_encaps_signals(struct hl_device *hdev)
{ … }

void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush)
{ … }

static void
wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
{ … }

void hl_release_pending_user_interrupts(struct hl_device *hdev)
{ … }

static void force_complete_cs(struct hl_device *hdev)
{ … }

void hl_abort_waiting_for_cs_completions(struct hl_device *hdev)
{ … }

static void job_wq_completion(struct work_struct *work)
{ … }

static void cs_completion(struct work_struct *work)
{ … }

u32 hl_get_active_cs_num(struct hl_device *hdev)
{ … }

static int validate_queue_index(struct hl_device *hdev,
				struct hl_cs_chunk *chunk,
				enum hl_queue_type *queue_type,
				bool *is_kernel_allocated_cb)
{ … }

static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
					struct hl_mem_mgr *mmg,
					struct hl_cs_chunk *chunk)
{ … }

struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
		enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
{ … }

static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
{ … }

static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
{ … }

static int hl_cs_copy_chunk_array(struct hl_device *hdev,
					struct hl_cs_chunk **cs_chunk_array,
					void __user *chunks, u32 num_chunks,
					struct hl_ctx *ctx)
{ … }

static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
				u64 sequence, u32 flags,
				u32 encaps_signal_handle)
{ … }

static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid)
{ … }

static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
				u32 num_chunks, u64 *cs_seq, u32 flags,
				u32 encaps_signals_handle, u32 timeout,
				u16 *signal_initial_sob_count)
{ … }

static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
				u64 *cs_seq)
{ … }

/*
 * hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case.
 * if the SOB value reaches the max value move to the other SOB reserved
 * to the queue.
 * @hdev: pointer to device structure
 * @q_idx: stream queue index
 * @hw_sob: the H/W SOB used in this signal CS.
 * @count: signals count
 * @encaps_sig: tells whether it's reservation for encaps signals or not.
 *
 * Note that this function must be called while hw_queues_lock is taken.
 */
int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
			struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig)

{ … }

static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
		struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx,
		bool encaps_signals)
{ … }

static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
		struct hl_ctx *ctx, struct hl_cs *cs,
		enum hl_queue_type q_type, u32 q_idx, u32 encaps_signal_offset)
{ … }

static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv,
				u32 q_idx, u32 count,
				u32 *handle_id, u32 *sob_addr,
				u32 *signals_count)
{ … }

static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id)
{ … }

static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
				void __user *chunks, u32 num_chunks,
				u64 *cs_seq, u32 flags, u32 timeout,
				u32 *signal_sob_addr_offset, u16 *signal_initial_sob_count)
{ … }

static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores,
						u32 num_engine_cores, u32 core_command)
{ … }

static int cs_ioctl_engines(struct hl_fpriv *hpriv, u64 engines_arr_user_addr,
						u32 num_engines, enum hl_engine_command command)
{ … }

static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv)
{ … }

int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
{ … }

static int hl_wait_for_fence(struct hl_ctx *ctx, u64 seq, struct hl_fence *fence,
				enum hl_cs_wait_status *status, u64 timeout_us, s64 *timestamp)
{ … }

/*
 * hl_cs_poll_fences - iterate CS fences to check for CS completion
 *
 * @mcs_data: multi-CS internal data
 * @mcs_compl: multi-CS completion structure
 *
 * @return 0 on success, otherwise non 0 error code
 *
 * The function iterates on all CS sequence in the list and set bit in
 * completion_bitmap for each completed CS.
 * While iterating, the function sets the stream map of each fence in the fence
 * array in the completion QID stream map to be used by CSs to perform
 * completion to the multi-CS context.
 * This function shall be called after taking context ref
 */
static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_completion *mcs_compl)
{ … }

static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq,
				enum hl_cs_wait_status *status, s64 *timestamp)
{ … }

static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs)
{ … }

/*
 * hl_wait_multi_cs_completion_init - init completion structure
 *
 * @hdev: pointer to habanalabs device structure
 * @stream_master_bitmap: stream master QIDs map, set bit indicates stream
 *                        master QID to wait on
 *
 * @return valid completion struct pointer on success, otherwise error pointer
 *
 * up to MULTI_CS_MAX_USER_CTX calls can be done concurrently to the driver.
 * the function gets the first available completion (by marking it "used")
 * and initialize its values.
 */
static struct multi_cs_completion *hl_wait_multi_cs_completion_init(struct hl_device *hdev)
{ … }

/*
 * hl_wait_multi_cs_completion_fini - return completion structure and set as
 *                                    unused
 *
 * @mcs_compl: pointer to the completion structure
 */
static void hl_wait_multi_cs_completion_fini(
					struct multi_cs_completion *mcs_compl)
{ … }

/*
 * hl_wait_multi_cs_completion - wait for first CS to complete
 *
 * @mcs_data: multi-CS internal data
 *
 * @return 0 on success, otherwise non 0 error code
 */
static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data,
						struct multi_cs_completion *mcs_compl)
{ … }

/*
 * hl_multi_cs_completion_init - init array of multi-CS completion structures
 *
 * @hdev: pointer to habanalabs device structure
 */
void hl_multi_cs_completion_init(struct hl_device *hdev)
{ … }

/*
 * hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl
 *
 * @hpriv: pointer to the private data of the fd
 * @data: pointer to multi-CS wait ioctl in/out args
 *
 */
static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
{ … }

static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
{ … }

static inline void set_record_cq_info(struct hl_user_pending_interrupt *record,
					struct hl_cb *cq_cb, u32 cq_offset, u32 target_value)
{ … }

static int validate_and_get_ts_record(struct device *dev,
					struct hl_ts_buff *ts_buff, u64 ts_offset,
					struct hl_user_pending_interrupt **req_event_record)
{ … }

static void unregister_timestamp_node(struct hl_device *hdev,
			struct hl_user_pending_interrupt *record, bool need_lock)
{ … }

static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx *ctx,
					struct wait_interrupt_data *data, unsigned long *flags,
					struct hl_user_pending_interrupt **pend)
{ … }

static int _hl_interrupt_ts_reg_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
				struct wait_interrupt_data *data,
				u32 *status, u64 *timestamp)
{ … }

static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
				struct wait_interrupt_data *data,
				u32 *status, u64 *timestamp)
{ … }

static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ctx *ctx,
				u64 timeout_us, u64 user_address,
				u64 target_value, struct hl_user_interrupt *interrupt,
				u32 *status,
				u64 *timestamp)
{ … }

static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
{ … }

int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
{ … }
linux/drivers/accel/habanalabs/common/command_submission.c