ice_txrx.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018, Intel Corporation. */

/* The driver transmit and receive code */

#include <linux/mm.h>
#include <linux/netdevice.h>
#include <linux/prefetch.h>
#include <linux/bpf_trace.h>
#include <net/dsfield.h>
#include <net/mpls.h>
#include <net/xdp.h>
#include "ice_txrx_lib.h"
#include "ice_lib.h"
#include "ice.h"
#include "ice_trace.h"
#include "ice_dcb_lib.h"
#include "ice_xsk.h"
#include "ice_eswitch.h"

#define ICE_RX_HDR_SIZE …

#define FDIR_DESC_RXDID …
#define ICE_FDIR_CLEAN_DELAY …

/**
 * ice_prgm_fdir_fltr - Program a Flow Director filter
 * @vsi: VSI to send dummy packet
 * @fdir_desc: flow director descriptor
 * @raw_packet: allocated buffer for flow director
 */
int
ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc,
		   u8 *raw_packet)
{ … }

/**
 * ice_unmap_and_free_tx_buf - Release a Tx buffer
 * @ring: the ring that owns the buffer
 * @tx_buf: the buffer to free
 */
static void
ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf)
{ … }

static struct netdev_queue *txring_txq(const struct ice_tx_ring *ring)
{ … }

/**
 * ice_clean_tx_ring - Free any empty Tx buffers
 * @tx_ring: ring to be cleaned
 */
void ice_clean_tx_ring(struct ice_tx_ring *tx_ring)
{ … }

/**
 * ice_free_tx_ring - Free Tx resources per queue
 * @tx_ring: Tx descriptor ring for a specific queue
 *
 * Free all transmit software resources
 */
void ice_free_tx_ring(struct ice_tx_ring *tx_ring)
{ … }

/**
 * ice_clean_tx_irq - Reclaim resources after transmit completes
 * @tx_ring: Tx ring to clean
 * @napi_budget: Used to determine if we are in netpoll
 *
 * Returns true if there's any budget left (e.g. the clean is finished)
 */
static bool ice_clean_tx_irq(struct ice_tx_ring *tx_ring, int napi_budget)
{ … }

/**
 * ice_setup_tx_ring - Allocate the Tx descriptors
 * @tx_ring: the Tx ring to set up
 *
 * Return 0 on success, negative on error
 */
int ice_setup_tx_ring(struct ice_tx_ring *tx_ring)
{ … }

/**
 * ice_clean_rx_ring - Free Rx buffers
 * @rx_ring: ring to be cleaned
 */
void ice_clean_rx_ring(struct ice_rx_ring *rx_ring)
{ … }

/**
 * ice_free_rx_ring - Free Rx resources
 * @rx_ring: ring to clean the resources from
 *
 * Free all receive software resources
 */
void ice_free_rx_ring(struct ice_rx_ring *rx_ring)
{ … }

/**
 * ice_setup_rx_ring - Allocate the Rx descriptors
 * @rx_ring: the Rx ring to set up
 *
 * Return 0 on success, negative on error
 */
int ice_setup_rx_ring(struct ice_rx_ring *rx_ring)
{ … }

/**
 * ice_run_xdp - Executes an XDP program on initialized xdp_buff
 * @rx_ring: Rx ring
 * @xdp: xdp_buff used as input to the XDP program
 * @xdp_prog: XDP program to run
 * @xdp_ring: ring to be used for XDP_TX action
 * @rx_buf: Rx buffer to store the XDP action
 * @eop_desc: Last descriptor in packet to read metadata from
 *
 * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
 */
static void
ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
	    struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
	    struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc)
{ … }

/**
 * ice_xmit_xdp_ring - submit frame to XDP ring for transmission
 * @xdpf: XDP frame that will be converted to XDP buff
 * @xdp_ring: XDP ring for transmission
 */
static int ice_xmit_xdp_ring(const struct xdp_frame *xdpf,
			     struct ice_tx_ring *xdp_ring)
{ … }

/**
 * ice_xdp_xmit - submit packets to XDP ring for transmission
 * @dev: netdev
 * @n: number of XDP frames to be transmitted
 * @frames: XDP frames to be transmitted
 * @flags: transmit flags
 *
 * Returns number of frames successfully sent. Failed frames
 * will be free'ed by XDP core.
 * For error cases, a negative errno code is returned and no-frames
 * are transmitted (caller must handle freeing frames).
 */
int
ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
	     u32 flags)
{ … }

/**
 * ice_alloc_mapped_page - recycle or make a new page
 * @rx_ring: ring to use
 * @bi: rx_buf struct to modify
 *
 * Returns true if the page was successfully allocated or
 * reused.
 */
static bool
ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi)
{ … }

/**
 * ice_alloc_rx_bufs - Replace used receive buffers
 * @rx_ring: ring to place buffers on
 * @cleaned_count: number of buffers to replace
 *
 * Returns false if all allocations were successful, true if any fail. Returning
 * true signals to the caller that we didn't replace cleaned_count buffers and
 * there is more work to do.
 *
 * First, try to clean "cleaned_count" Rx buffers. Then refill the cleaned Rx
 * buffers. Then bump tail at most one time. Grouping like this lets us avoid
 * multiple tail writes per call.
 */
bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count)
{ … }

/**
 * ice_rx_buf_adjust_pg_offset - Prepare Rx buffer for reuse
 * @rx_buf: Rx buffer to adjust
 * @size: Size of adjustment
 *
 * Update the offset within page so that Rx buf will be ready to be reused.
 * For systems with PAGE_SIZE < 8192 this function will flip the page offset
 * so the second half of page assigned to Rx buffer will be used, otherwise
 * the offset is moved by "size" bytes
 */
static void
ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size)
{ … }

/**
 * ice_can_reuse_rx_page - Determine if page can be reused for another Rx
 * @rx_buf: buffer containing the page
 *
 * If page is reusable, we have a green light for calling ice_reuse_rx_page,
 * which will assign the current buffer to the buffer that next_to_alloc is
 * pointing to; otherwise, the DMA mapping needs to be destroyed and
 * page freed
 */
static bool
ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
{ … }

/**
 * ice_add_xdp_frag - Add contents of Rx buffer to xdp buf as a frag
 * @rx_ring: Rx descriptor ring to transact packets on
 * @xdp: xdp buff to place the data into
 * @rx_buf: buffer containing page to add
 * @size: packet length from rx_desc
 *
 * This function will add the data contained in rx_buf->page to the xdp buf.
 * It will just attach the page as a frag.
 */
static int
ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
		 struct ice_rx_buf *rx_buf, const unsigned int size)
{ … }

/**
 * ice_reuse_rx_page - page flip buffer and store it back on the ring
 * @rx_ring: Rx descriptor ring to store buffers on
 * @old_buf: donor buffer to have page reused
 *
 * Synchronizes page for reuse by the adapter
 */
static void
ice_reuse_rx_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *old_buf)
{ … }

/**
 * ice_get_rx_buf - Fetch Rx buffer and synchronize data for use
 * @rx_ring: Rx descriptor ring to transact packets on
 * @size: size of buffer to add to skb
 * @ntc: index of next to clean element
 *
 * This function will pull an Rx buffer from the ring and synchronize it
 * for use by the CPU.
 */
static struct ice_rx_buf *
ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size,
	       const unsigned int ntc)
{ … }

/**
 * ice_build_skb - Build skb around an existing buffer
 * @rx_ring: Rx descriptor ring to transact packets on
 * @xdp: xdp_buff pointing to the data
 *
 * This function builds an skb around an existing XDP buffer, taking care
 * to set up the skb correctly and avoid any memcpy overhead. Driver has
 * already combined frags (if any) to skb_shared_info.
 */
static struct sk_buff *
ice_build_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
{ … }

/**
 * ice_construct_skb - Allocate skb and populate it
 * @rx_ring: Rx descriptor ring to transact packets on
 * @xdp: xdp_buff pointing to the data
 *
 * This function allocates an skb. It then populates it with the page
 * data from the current receive descriptor, taking care to set up the
 * skb correctly.
 */
static struct sk_buff *
ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
{ … }

/**
 * ice_put_rx_buf - Clean up used buffer and either recycle or free
 * @rx_ring: Rx descriptor ring to transact packets on
 * @rx_buf: Rx buffer to pull data from
 *
 * This function will clean up the contents of the rx_buf. It will either
 * recycle the buffer or unmap it and free the associated resources.
 */
static void
ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf)
{ … }

/**
 * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
 * @rx_ring: Rx descriptor ring to transact packets on
 * @budget: Total limit on number of packets to process
 *
 * This function provides a "bounce buffer" approach to Rx interrupt
 * processing. The advantage to this is that on systems that have
 * expensive overhead for IOMMU access this provides a means of avoiding
 * it by maintaining the mapping of the page to the system.
 *
 * Returns amount of work completed
 */
int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
{ … }

static void __ice_update_sample(struct ice_q_vector *q_vector,
				struct ice_ring_container *rc,
				struct dim_sample *sample,
				bool is_tx)
{ … }

/**
 * ice_net_dim - Update net DIM algorithm
 * @q_vector: the vector associated with the interrupt
 *
 * Create a DIM sample and notify net_dim() so that it can possibly decide
 * a new ITR value based on incoming packets, bytes, and interrupts.
 *
 * This function is a no-op if the ring is not configured to dynamic ITR.
 */
static void ice_net_dim(struct ice_q_vector *q_vector)
{ … }

/**
 * ice_buildreg_itr - build value for writing to the GLINT_DYN_CTL register
 * @itr_idx: interrupt throttling index
 * @itr: interrupt throttling value in usecs
 */
static u32 ice_buildreg_itr(u16 itr_idx, u16 itr)
{ … }

/**
 * ice_enable_interrupt - re-enable MSI-X interrupt
 * @q_vector: the vector associated with the interrupt to enable
 *
 * If the VSI is down, the interrupt will not be re-enabled. Also,
 * when enabling the interrupt always reset the wb_on_itr to false
 * and trigger a software interrupt to clean out internal state.
 */
static void ice_enable_interrupt(struct ice_q_vector *q_vector)
{ … }

/**
 * ice_set_wb_on_itr - set WB_ON_ITR for this q_vector
 * @q_vector: q_vector to set WB_ON_ITR on
 *
 * We need to tell hardware to write-back completed descriptors even when
 * interrupts are disabled. Descriptors will be written back on cache line
 * boundaries without WB_ON_ITR enabled, but if we don't enable WB_ON_ITR
 * descriptors may not be written back if they don't fill a cache line until
 * the next interrupt.
 *
 * This sets the write-back frequency to whatever was set previously for the
 * ITR indices. Also, set the INTENA_MSK bit to make sure hardware knows we
 * aren't meddling with the INTENA_M bit.
 */
static void ice_set_wb_on_itr(struct ice_q_vector *q_vector)
{ … }

/**
 * ice_napi_poll - NAPI polling Rx/Tx cleanup routine
 * @napi: napi struct with our devices info in it
 * @budget: amount of work driver is allowed to do this pass, in packets
 *
 * This function will clean all queues associated with a q_vector.
 *
 * Returns the amount of work done
 */
int ice_napi_poll(struct napi_struct *napi, int budget)
{ … }

/**
 * __ice_maybe_stop_tx - 2nd level check for Tx stop conditions
 * @tx_ring: the ring to be checked
 * @size: the size buffer we want to assure is available
 *
 * Returns -EBUSY if a stop is needed, else 0
 */
static int __ice_maybe_stop_tx(struct ice_tx_ring *tx_ring, unsigned int size)
{ … }

/**
 * ice_maybe_stop_tx - 1st level check for Tx stop conditions
 * @tx_ring: the ring to be checked
 * @size:    the size buffer we want to assure is available
 *
 * Returns 0 if stop is not needed
 */
static int ice_maybe_stop_tx(struct ice_tx_ring *tx_ring, unsigned int size)
{ … }

/**
 * ice_tx_map - Build the Tx descriptor
 * @tx_ring: ring to send buffer on
 * @first: first buffer info buffer to use
 * @off: pointer to struct that holds offload parameters
 *
 * This function loops over the skb data pointed to by *first
 * and gets a physical address for each memory location and programs
 * it and the length into the transmit descriptor.
 */
static void
ice_tx_map(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first,
	   struct ice_tx_offload_params *off)
{ … }

/**
 * ice_tx_csum - Enable Tx checksum offloads
 * @first: pointer to the first descriptor
 * @off: pointer to struct that holds offload parameters
 *
 * Returns 0 or error (negative) if checksum offload can't happen, 1 otherwise.
 */
static
int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
{ … }

/**
 * ice_tx_prepare_vlan_flags - prepare generic Tx VLAN tagging flags for HW
 * @tx_ring: ring to send buffer on
 * @first: pointer to struct ice_tx_buf
 *
 * Checks the skb and set up correspondingly several generic transmit flags
 * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
 */
static void
ice_tx_prepare_vlan_flags(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first)
{ … }

/**
 * ice_tso - computes mss and TSO length to prepare for TSO
 * @first: pointer to struct ice_tx_buf
 * @off: pointer to struct that holds offload parameters
 *
 * Returns 0 or error (negative) if TSO can't happen, 1 otherwise.
 */
static
int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
{ … }

/**
 * ice_txd_use_count  - estimate the number of descriptors needed for Tx
 * @size: transmit request size in bytes
 *
 * Due to hardware alignment restrictions (4K alignment), we need to
 * assume that we can have no more than 12K of data per descriptor, even
 * though each descriptor can take up to 16K - 1 bytes of aligned memory.
 * Thus, we need to divide by 12K. But division is slow! Instead,
 * we decompose the operation into shifts and one relatively cheap
 * multiply operation.
 *
 * To divide by 12K, we first divide by 4K, then divide by 3:
 *     To divide by 4K, shift right by 12 bits
 *     To divide by 3, multiply by 85, then divide by 256
 *     (Divide by 256 is done by shifting right by 8 bits)
 * Finally, we add one to round up. Because 256 isn't an exact multiple of
 * 3, we'll underestimate near each multiple of 12K. This is actually more
 * accurate as we have 4K - 1 of wiggle room that we can fit into the last
 * segment. For our purposes this is accurate out to 1M which is orders of
 * magnitude greater than our largest possible GSO size.
 *
 * This would then be implemented as:
 *     return (((size >> 12) * 85) >> 8) + ICE_DESCS_FOR_SKB_DATA_PTR;
 *
 * Since multiplication and division are commutative, we can reorder
 * operations into:
 *     return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR;
 */
static unsigned int ice_txd_use_count(unsigned int size)
{ … }

/**
 * ice_xmit_desc_count - calculate number of Tx descriptors needed
 * @skb: send buffer
 *
 * Returns number of data descriptors needed for this skb.
 */
static unsigned int ice_xmit_desc_count(struct sk_buff *skb)
{ … }

/**
 * __ice_chk_linearize - Check if there are more than 8 buffers per packet
 * @skb: send buffer
 *
 * Note: This HW can't DMA more than 8 buffers to build a packet on the wire
 * and so we need to figure out the cases where we need to linearize the skb.
 *
 * For TSO we need to count the TSO header and segment payload separately.
 * As such we need to check cases where we have 7 fragments or more as we
 * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for
 * the segment payload in the first descriptor, and another 7 for the
 * fragments.
 */
static bool __ice_chk_linearize(struct sk_buff *skb)
{ … }

/**
 * ice_chk_linearize - Check if there are more than 8 fragments per packet
 * @skb:      send buffer
 * @count:    number of buffers used
 *
 * Note: Our HW can't scatter-gather more than 8 fragments to build
 * a packet on the wire and so we need to figure out the cases where we
 * need to linearize the skb.
 */
static bool ice_chk_linearize(struct sk_buff *skb, unsigned int count)
{ … }

/**
 * ice_tstamp - set up context descriptor for hardware timestamp
 * @tx_ring: pointer to the Tx ring to send buffer on
 * @skb: pointer to the SKB we're sending
 * @first: Tx buffer
 * @off: Tx offload parameters
 */
static void
ice_tstamp(struct ice_tx_ring *tx_ring, struct sk_buff *skb,
	   struct ice_tx_buf *first, struct ice_tx_offload_params *off)
{ … }

/**
 * ice_xmit_frame_ring - Sends buffer on Tx ring
 * @skb: send buffer
 * @tx_ring: ring to send buffer on
 *
 * Returns NETDEV_TX_OK if sent, else an error code
 */
static netdev_tx_t
ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring)
{ … }

/**
 * ice_start_xmit - Selects the correct VSI and Tx queue to send buffer
 * @skb: send buffer
 * @netdev: network interface device structure
 *
 * Returns NETDEV_TX_OK if sent, else an error code
 */
netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev)
{ … }

/**
 * ice_get_dscp_up - return the UP/TC value for a SKB
 * @dcbcfg: DCB config that contains DSCP to UP/TC mapping
 * @skb: SKB to query for info to determine UP/TC
 *
 * This function is to only be called when the PF is in L3 DSCP PFC mode
 */
static u8 ice_get_dscp_up(struct ice_dcbx_cfg *dcbcfg, struct sk_buff *skb)
{ … }

u16
ice_select_queue(struct net_device *netdev, struct sk_buff *skb,
		 struct net_device *sb_dev)
{ … }

/**
 * ice_clean_ctrl_tx_irq - interrupt handler for flow director Tx queue
 * @tx_ring: tx_ring to clean
 */
void ice_clean_ctrl_tx_irq(struct ice_tx_ring *tx_ring)
{ … }
linux/drivers/net/ethernet/intel/ice/ice_txrx.c