sdma.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
 * Copyright(c) 2015 - 2018 Intel Corporation.
 */

#include <linux/spinlock.h>
#include <linux/seqlock.h>
#include <linux/netdevice.h>
#include <linux/moduleparam.h>
#include <linux/bitops.h>
#include <linux/timer.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>

#include "hfi.h"
#include "common.h"
#include "qp.h"
#include "sdma.h"
#include "iowait.h"
#include "trace.h"

/* must be a power of 2 >= 64 <= 32768 */
#define SDMA_DESCQ_CNT …
#define SDMA_DESC_INTR …
#define INVALID_TAIL …
#define SDMA_PAD …

static uint sdma_descq_cnt = …;
module_param(sdma_descq_cnt, uint, S_IRUGO);
MODULE_PARM_DESC(…) …;

static uint sdma_idle_cnt = …;
module_param(sdma_idle_cnt, uint, S_IRUGO);
MODULE_PARM_DESC(…) …;

uint mod_num_sdma;
module_param_named(num_sdma, mod_num_sdma, uint, S_IRUGO);
MODULE_PARM_DESC(…) …;

static uint sdma_desct_intr = …;
module_param_named(desct_intr, sdma_desct_intr, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(…) …;

#define SDMA_WAIT_BATCH_SIZE …
/* max wait time for a SDMA engine to indicate it has halted */
#define SDMA_ERR_HALT_TIMEOUT …
/* all SDMA engine errors that cause a halt */

#define SD(name) …
#define ALL_SDMA_ENG_HALT_ERRS …

/* sdma_sendctrl operations */
#define SDMA_SENDCTRL_OP_ENABLE …
#define SDMA_SENDCTRL_OP_INTENABLE …
#define SDMA_SENDCTRL_OP_HALT …
#define SDMA_SENDCTRL_OP_CLEANUP …

/* handle long defines */
#define SDMA_EGRESS_PACKET_OCCUPANCY_SMASK …
#define SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT …

static const char * const sdma_state_names[] = …;

#ifdef CONFIG_SDMA_VERBOSITY
static const char * const sdma_event_names[] = …;
#endif

static const struct sdma_set_state_action sdma_action_table[] = …;

#define SDMA_TAIL_UPDATE_THRESH …

/* declare all statics here rather than keep sorting */
static void sdma_complete(struct kref *);
static void sdma_finalput(struct sdma_state *);
static void sdma_get(struct sdma_state *);
static void sdma_hw_clean_up_task(struct tasklet_struct *);
static void sdma_put(struct sdma_state *);
static void sdma_set_state(struct sdma_engine *, enum sdma_states);
static void sdma_start_hw_clean_up(struct sdma_engine *);
static void sdma_sw_clean_up_task(struct tasklet_struct *);
static void sdma_sendctrl(struct sdma_engine *, unsigned);
static void init_sdma_regs(struct sdma_engine *, u32, uint);
static void sdma_process_event(
	struct sdma_engine *sde,
	enum sdma_events event);
static void __sdma_process_event(
	struct sdma_engine *sde,
	enum sdma_events event);
static void dump_sdma_state(struct sdma_engine *sde);
static void sdma_make_progress(struct sdma_engine *sde, u64 status);
static void sdma_desc_avail(struct sdma_engine *sde, uint avail);
static void sdma_flush_descq(struct sdma_engine *sde);

/**
 * sdma_state_name() - return state string from enum
 * @state: state
 */
static const char *sdma_state_name(enum sdma_states state)
{ … }

static void sdma_get(struct sdma_state *ss)
{ … }

static void sdma_complete(struct kref *kref)
{ … }

static void sdma_put(struct sdma_state *ss)
{ … }

static void sdma_finalput(struct sdma_state *ss)
{ … }

static inline void write_sde_csr(
	struct sdma_engine *sde,
	u32 offset0,
	u64 value)
{ … }

static inline u64 read_sde_csr(
	struct sdma_engine *sde,
	u32 offset0)
{ … }

/*
 * sdma_wait_for_packet_egress() - wait for the VL FIFO occupancy for
 * sdma engine 'sde' to drop to 0.
 */
static void sdma_wait_for_packet_egress(struct sdma_engine *sde,
					int pause)
{ … }

/*
 * sdma_wait() - wait for packet egress to complete for all SDMA engines,
 * and pause for credit return.
 */
void sdma_wait(struct hfi1_devdata *dd)
{ … }

static inline void sdma_set_desc_cnt(struct sdma_engine *sde, unsigned cnt)
{ … }

static inline void complete_tx(struct sdma_engine *sde,
			       struct sdma_txreq *tx,
			       int res)
{ … }

/*
 * Complete all the sdma requests with a SDMA_TXREQ_S_ABORTED status
 *
 * Depending on timing there can be txreqs in two places:
 * - in the descq ring
 * - in the flush list
 *
 * To avoid ordering issues the descq ring needs to be flushed
 * first followed by the flush list.
 *
 * This routine is called from two places
 * - From a work queue item
 * - Directly from the state machine just before setting the
 *   state to running
 *
 * Must be called with head_lock held
 *
 */
static void sdma_flush(struct sdma_engine *sde)
{ … }

/*
 * Fields a work request for flushing the descq ring
 * and the flush list
 *
 * If the engine has been brought to running during
 * the scheduling delay, the flush is ignored, assuming
 * that the process of bringing the engine to running
 * would have done this flush prior to going to running.
 *
 */
static void sdma_field_flush(struct work_struct *work)
{ … }

static void sdma_err_halt_wait(struct work_struct *work)
{ … }

static void sdma_err_progress_check_schedule(struct sdma_engine *sde)
{ … }

static void sdma_err_progress_check(struct timer_list *t)
{ … }

static void sdma_hw_clean_up_task(struct tasklet_struct *t)
{ … }

static inline struct sdma_txreq *get_txhead(struct sdma_engine *sde)
{ … }

/*
 * flush ring for recovery
 */
static void sdma_flush_descq(struct sdma_engine *sde)
{ … }

static void sdma_sw_clean_up_task(struct tasklet_struct *t)
{ … }

static void sdma_sw_tear_down(struct sdma_engine *sde)
{ … }

static void sdma_start_hw_clean_up(struct sdma_engine *sde)
{ … }

static void sdma_set_state(struct sdma_engine *sde,
			   enum sdma_states next_state)
{ … }

/**
 * sdma_get_descq_cnt() - called when device probed
 *
 * Return a validated descq count.
 *
 * This is currently only used in the verbs initialization to build the tx
 * list.
 *
 * This will probably be deleted in favor of a more scalable approach to
 * alloc tx's.
 *
 */
u16 sdma_get_descq_cnt(void)
{ … }

/**
 * sdma_engine_get_vl() - return vl for a given sdma engine
 * @sde: sdma engine
 *
 * This function returns the vl mapped to a given engine, or an error if
 * the mapping can't be found. The mapping fields are protected by RCU.
 */
int sdma_engine_get_vl(struct sdma_engine *sde)
{ … }

/**
 * sdma_select_engine_vl() - select sdma engine
 * @dd: devdata
 * @selector: a spreading factor
 * @vl: this vl
 *
 *
 * This function returns an engine based on the selector and a vl.  The
 * mapping fields are protected by RCU.
 */
struct sdma_engine *sdma_select_engine_vl(
	struct hfi1_devdata *dd,
	u32 selector,
	u8 vl)
{ … }

/**
 * sdma_select_engine_sc() - select sdma engine
 * @dd: devdata
 * @selector: a spreading factor
 * @sc5: the 5 bit sc
 *
 *
 * This function returns an engine based on the selector and an sc.
 */
struct sdma_engine *sdma_select_engine_sc(
	struct hfi1_devdata *dd,
	u32 selector,
	u8 sc5)
{ … }

struct sdma_rht_map_elem { … };

struct sdma_rht_node { … };

#define NR_CPUS_HINT …

static const struct rhashtable_params sdma_rht_params = …;

/*
 * sdma_select_user_engine() - select sdma engine based on user setup
 * @dd: devdata
 * @selector: a spreading factor
 * @vl: this vl
 *
 * This function returns an sdma engine for a user sdma request.
 * User defined sdma engine affinity setting is honored when applicable,
 * otherwise system default sdma engine mapping is used. To ensure correct
 * ordering, the mapping from <selector, vl> to sde must remain unchanged.
 */
struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
					    u32 selector, u8 vl)
{ … }

static void sdma_populate_sde_map(struct sdma_rht_map_elem *map)
{ … }

static void sdma_cleanup_sde_map(struct sdma_rht_map_elem *map,
				 struct sdma_engine *sde)
{ … }

/*
 * Prevents concurrent reads and writes of the sdma engine cpu_mask
 */
static DEFINE_MUTEX(process_to_sde_mutex);

ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
				size_t count)
{ … }

ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
{ … }

static void sdma_rht_free(void *ptr, void *arg)
{ … }

/**
 * sdma_seqfile_dump_cpu_list() - debugfs dump the cpu to sdma mappings
 * @s: seq file
 * @dd: hfi1_devdata
 * @cpuid: cpu id
 *
 * This routine dumps the process to sde mappings per cpu
 */
void sdma_seqfile_dump_cpu_list(struct seq_file *s,
				struct hfi1_devdata *dd,
				unsigned long cpuid)
{ … }

/*
 * Free the indicated map struct
 */
static void sdma_map_free(struct sdma_vl_map *m)
{ … }

/*
 * Handle RCU callback
 */
static void sdma_map_rcu_callback(struct rcu_head *list)
{ … }

/**
 * sdma_map_init - called when # vls change
 * @dd: hfi1_devdata
 * @port: port number
 * @num_vls: number of vls
 * @vl_engines: per vl engine mapping (optional)
 *
 * This routine changes the mapping based on the number of vls.
 *
 * vl_engines is used to specify a non-uniform vl/engine loading. NULL
 * implies auto computing the loading and giving each VLs a uniform
 * distribution of engines per VL.
 *
 * The auto algorithm computes the sde_per_vl and the number of extra
 * engines.  Any extra engines are added from the last VL on down.
 *
 * rcu locking is used here to control access to the mapping fields.
 *
 * If either the num_vls or num_sdma are non-power of 2, the array sizes
 * in the struct sdma_vl_map and the struct sdma_map_elem are rounded
 * up to the next highest power of 2 and the first entry is reused
 * in a round robin fashion.
 *
 * If an error occurs the map change is not done and the mapping is
 * not changed.
 *
 */
int sdma_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_engines)
{ … }

/**
 * sdma_clean - Clean up allocated memory
 * @dd:          struct hfi1_devdata
 * @num_engines: num sdma engines
 *
 * This routine can be called regardless of the success of
 * sdma_init()
 */
void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
{ … }

/**
 * sdma_init() - called when device probed
 * @dd: hfi1_devdata
 * @port: port number (currently only zero)
 *
 * Initializes each sde and its csrs.
 * Interrupts are not required to be enabled.
 *
 * Returns:
 * 0 - success, -errno on failure
 */
int sdma_init(struct hfi1_devdata *dd, u8 port)
{ … }

/**
 * sdma_all_running() - called when the link goes up
 * @dd: hfi1_devdata
 *
 * This routine moves all engines to the running state.
 */
void sdma_all_running(struct hfi1_devdata *dd)
{ … }

/**
 * sdma_all_idle() - called when the link goes down
 * @dd: hfi1_devdata
 *
 * This routine moves all engines to the idle state.
 */
void sdma_all_idle(struct hfi1_devdata *dd)
{ … }

/**
 * sdma_start() - called to kick off state processing for all engines
 * @dd: hfi1_devdata
 *
 * This routine is for kicking off the state processing for all required
 * sdma engines.  Interrupts need to be working at this point.
 *
 */
void sdma_start(struct hfi1_devdata *dd)
{ … }

/**
 * sdma_exit() - used when module is removed
 * @dd: hfi1_devdata
 */
void sdma_exit(struct hfi1_devdata *dd)
{ … }

/*
 * unmap the indicated descriptor
 */
static inline void sdma_unmap_desc(
	struct hfi1_devdata *dd,
	struct sdma_desc *descp)
{ … }

/*
 * return the mode as indicated by the first
 * descriptor in the tx.
 */
static inline u8 ahg_mode(struct sdma_txreq *tx)
{ … }

/**
 * __sdma_txclean() - clean tx of mappings, descp *kmalloc's
 * @dd: hfi1_devdata for unmapping
 * @tx: tx request to clean
 *
 * This is used in the progress routine to clean the tx or
 * by the ULP to toss an in-process tx build.
 *
 * The code can be called multiple times without issue.
 *
 */
void __sdma_txclean(
	struct hfi1_devdata *dd,
	struct sdma_txreq *tx)
{ … }

static inline u16 sdma_gethead(struct sdma_engine *sde)
{ … }

/*
 * This is called when there are send DMA descriptors that might be
 * available.
 *
 * This is called with head_lock held.
 */
static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
{ … }

/* head_lock must be held */
static void sdma_make_progress(struct sdma_engine *sde, u64 status)
{ … }

/*
 * sdma_engine_interrupt() - interrupt handler for engine
 * @sde: sdma engine
 * @status: sdma interrupt reason
 *
 * Status is a mask of the 3 possible interrupts for this engine.  It will
 * contain bits _only_ for this SDMA engine.  It will contain at least one
 * bit, it may contain more.
 */
void sdma_engine_interrupt(struct sdma_engine *sde, u64 status)
{ … }

/**
 * sdma_engine_error() - error handler for engine
 * @sde: sdma engine
 * @status: sdma interrupt reason
 */
void sdma_engine_error(struct sdma_engine *sde, u64 status)
{ … }

static void sdma_sendctrl(struct sdma_engine *sde, unsigned op)
{ … }

static void sdma_setlengen(struct sdma_engine *sde)
{ … }

static inline void sdma_update_tail(struct sdma_engine *sde, u16 tail)
{ … }

/*
 * This is called when changing to state s10_hw_start_up_halt_wait as
 * a result of send buffer errors or send DMA descriptor errors.
 */
static void sdma_hw_start_up(struct sdma_engine *sde)
{ … }

/*
 * set_sdma_integrity
 *
 * Set the SEND_DMA_CHECK_ENABLE register for send DMA engine 'sde'.
 */
static void set_sdma_integrity(struct sdma_engine *sde)
{ … }

static void init_sdma_regs(
	struct sdma_engine *sde,
	u32 credits,
	uint idle_cnt)
{ … }

#ifdef CONFIG_SDMA_VERBOSITY

#define sdma_dumpstate_helper0(reg) …

#define sdma_dumpstate_helper(reg) …

#define sdma_dumpstate_helper2(reg) …

void sdma_dumpstate(struct sdma_engine *sde)
{ … }
#endif

static void dump_sdma_state(struct sdma_engine *sde)
{ … }

#define SDE_FMT …
/**
 * sdma_seqfile_dump_sde() - debugfs dump of sde
 * @s: seq file
 * @sde: send dma engine to dump
 *
 * This routine dumps the sde to the indicated seq file.
 */
void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *sde)
{ … }

/*
 * add the generation number into
 * the qw1 and return
 */
static inline u64 add_gen(struct sdma_engine *sde, u64 qw1)
{ … }

/*
 * This routine submits the indicated tx
 *
 * Space has already been guaranteed and
 * tail side of ring is locked.
 *
 * The hardware tail update is done
 * in the caller and that is facilitated
 * by returning the new tail.
 *
 * There is special case logic for ahg
 * to not add the generation number for
 * up to 2 descriptors that follow the
 * first descriptor.
 *
 */
static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
{ … }

/*
 * Check for progress
 */
static int sdma_check_progress(
	struct sdma_engine *sde,
	struct iowait_work *wait,
	struct sdma_txreq *tx,
	bool pkts_sent)
{ … }

/**
 * sdma_send_txreq() - submit a tx req to ring
 * @sde: sdma engine to use
 * @wait: SE wait structure to use when full (may be NULL)
 * @tx: sdma_txreq to submit
 * @pkts_sent: has any packet been sent yet?
 *
 * The call submits the tx into the ring.  If a iowait structure is non-NULL
 * the packet will be queued to the list in wait.
 *
 * Return:
 * 0 - Success, -EINVAL - sdma_txreq incomplete, -EBUSY - no space in
 * ring (wait == NULL)
 * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
 */
int sdma_send_txreq(struct sdma_engine *sde,
		    struct iowait_work *wait,
		    struct sdma_txreq *tx,
		    bool pkts_sent)
{ … }

/**
 * sdma_send_txlist() - submit a list of tx req to ring
 * @sde: sdma engine to use
 * @wait: SE wait structure to use when full (may be NULL)
 * @tx_list: list of sdma_txreqs to submit
 * @count_out: pointer to a u16 which, after return will contain the total number of
 *             sdma_txreqs removed from the tx_list. This will include sdma_txreqs
 *             whose SDMA descriptors are submitted to the ring and the sdma_txreqs
 *             which are added to SDMA engine flush list if the SDMA engine state is
 *             not running.
 *
 * The call submits the list into the ring.
 *
 * If the iowait structure is non-NULL and not equal to the iowait list
 * the unprocessed part of the list  will be appended to the list in wait.
 *
 * In all cases, the tx_list will be updated so the head of the tx_list is
 * the list of descriptors that have yet to be transmitted.
 *
 * The intent of this call is to provide a more efficient
 * way of submitting multiple packets to SDMA while holding the tail
 * side locking.
 *
 * Return:
 * 0 - Success,
 * -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
 * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
 */
int sdma_send_txlist(struct sdma_engine *sde, struct iowait_work *wait,
		     struct list_head *tx_list, u16 *count_out)
{ … }

static void sdma_process_event(struct sdma_engine *sde, enum sdma_events event)
{ … }

static void __sdma_process_event(struct sdma_engine *sde,
				 enum sdma_events event)
{ … }

/*
 * _extend_sdma_tx_descs() - helper to extend txreq
 *
 * This is called once the initial nominal allocation
 * of descriptors in the sdma_txreq is exhausted.
 *
 * The code will bump the allocation up to the max
 * of MAX_DESC (64) descriptors. There doesn't seem
 * much point in an interim step. The last descriptor
 * is reserved for coalesce buffer in order to support
 * cases where input packet has >MAX_DESC iovecs.
 *
 */
static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
{ … }

/*
 * ext_coal_sdma_tx_descs() - extend or coalesce sdma tx descriptors
 *
 * This is called once the initial nominal allocation of descriptors
 * in the sdma_txreq is exhausted.
 *
 * This function calls _extend_sdma_tx_descs to extend or allocate
 * coalesce buffer. If there is a allocated coalesce buffer, it will
 * copy the input packet data into the coalesce buffer. It also adds
 * coalesce buffer descriptor once when whole packet is received.
 *
 * Return:
 * <0 - error
 * 0 - coalescing, don't populate descriptor
 * 1 - continue with populating descriptor
 */
int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
			   int type, void *kvaddr, struct page *page,
			   unsigned long offset, u16 len)
{ … }

/* Update sdes when the lmc changes */
void sdma_update_lmc(struct hfi1_devdata *dd, u64 mask, u32 lid)
{ … }

/* tx not dword sized - pad */
int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
{ … }

/*
 * Add ahg to the sdma_txreq
 *
 * The logic will consume up to 3
 * descriptors at the beginning of
 * sdma_txreq.
 */
void _sdma_txreq_ahgadd(
	struct sdma_txreq *tx,
	u8 num_ahg,
	u8 ahg_entry,
	u32 *ahg,
	u8 ahg_hlen)
{ … }

/**
 * sdma_ahg_alloc - allocate an AHG entry
 * @sde: engine to allocate from
 *
 * Return:
 * 0-31 when successful, -EOPNOTSUPP if AHG is not enabled,
 * -ENOSPC if an entry is not available
 */
int sdma_ahg_alloc(struct sdma_engine *sde)
{ … }

/**
 * sdma_ahg_free - free an AHG entry
 * @sde: engine to return AHG entry
 * @ahg_index: index to free
 *
 * This routine frees the indicate AHG entry.
 */
void sdma_ahg_free(struct sdma_engine *sde, int ahg_index)
{ … }

/*
 * SPC freeze handling for SDMA engines.  Called when the driver knows
 * the SPC is going into a freeze but before the freeze is fully
 * settled.  Generally an error interrupt.
 *
 * This event will pull the engine out of running so no more entries can be
 * added to the engine's queue.
 */
void sdma_freeze_notify(struct hfi1_devdata *dd, int link_down)
{ … }

/*
 * SPC freeze handling for SDMA engines.  Called when the driver knows
 * the SPC is fully frozen.
 */
void sdma_freeze(struct hfi1_devdata *dd)
{ … }

/*
 * SPC freeze handling for the SDMA engines.  Called after the SPC is unfrozen.
 *
 * The SPC freeze acts like a SDMA halt and a hardware clean combined.  All
 * that is left is a software clean.  We could do it after the SPC is fully
 * frozen, but then we'd have to add another state to wait for the unfreeze.
 * Instead, just defer the software clean until the unfreeze step.
 */
void sdma_unfreeze(struct hfi1_devdata *dd)
{ … }

/**
 * _sdma_engine_progress_schedule() - schedule progress on engine
 * @sde: sdma_engine to schedule progress
 *
 */
void _sdma_engine_progress_schedule(
	struct sdma_engine *sde)
{ … }
linux/drivers/infiniband/hw/hfi1/sdma.c