// SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2018, Intel Corporation. */ /* The driver transmit and receive code */ #include <linux/mm.h> #include <linux/netdevice.h> #include <linux/prefetch.h> #include <linux/bpf_trace.h> #include <net/dsfield.h> #include <net/mpls.h> #include <net/xdp.h> #include "ice_txrx_lib.h" #include "ice_lib.h" #include "ice.h" #include "ice_trace.h" #include "ice_dcb_lib.h" #include "ice_xsk.h" #include "ice_eswitch.h" #define ICE_RX_HDR_SIZE … #define FDIR_DESC_RXDID … #define ICE_FDIR_CLEAN_DELAY … /** * ice_prgm_fdir_fltr - Program a Flow Director filter * @vsi: VSI to send dummy packet * @fdir_desc: flow director descriptor * @raw_packet: allocated buffer for flow director */ int ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc, u8 *raw_packet) { … } /** * ice_unmap_and_free_tx_buf - Release a Tx buffer * @ring: the ring that owns the buffer * @tx_buf: the buffer to free */ static void ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf) { … } static struct netdev_queue *txring_txq(const struct ice_tx_ring *ring) { … } /** * ice_clean_tx_ring - Free any empty Tx buffers * @tx_ring: ring to be cleaned */ void ice_clean_tx_ring(struct ice_tx_ring *tx_ring) { … } /** * ice_free_tx_ring - Free Tx resources per queue * @tx_ring: Tx descriptor ring for a specific queue * * Free all transmit software resources */ void ice_free_tx_ring(struct ice_tx_ring *tx_ring) { … } /** * ice_clean_tx_irq - Reclaim resources after transmit completes * @tx_ring: Tx ring to clean * @napi_budget: Used to determine if we are in netpoll * * Returns true if there's any budget left (e.g. the clean is finished) */ static bool ice_clean_tx_irq(struct ice_tx_ring *tx_ring, int napi_budget) { … } /** * ice_setup_tx_ring - Allocate the Tx descriptors * @tx_ring: the Tx ring to set up * * Return 0 on success, negative on error */ int ice_setup_tx_ring(struct ice_tx_ring *tx_ring) { … } /** * ice_clean_rx_ring - Free Rx buffers * @rx_ring: ring to be cleaned */ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) { … } /** * ice_free_rx_ring - Free Rx resources * @rx_ring: ring to clean the resources from * * Free all receive software resources */ void ice_free_rx_ring(struct ice_rx_ring *rx_ring) { … } /** * ice_setup_rx_ring - Allocate the Rx descriptors * @rx_ring: the Rx ring to set up * * Return 0 on success, negative on error */ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) { … } /** * ice_run_xdp - Executes an XDP program on initialized xdp_buff * @rx_ring: Rx ring * @xdp: xdp_buff used as input to the XDP program * @xdp_prog: XDP program to run * @xdp_ring: ring to be used for XDP_TX action * @rx_buf: Rx buffer to store the XDP action * @eop_desc: Last descriptor in packet to read metadata from * * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ static void ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring, struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc) { … } /** * ice_xmit_xdp_ring - submit frame to XDP ring for transmission * @xdpf: XDP frame that will be converted to XDP buff * @xdp_ring: XDP ring for transmission */ static int ice_xmit_xdp_ring(const struct xdp_frame *xdpf, struct ice_tx_ring *xdp_ring) { … } /** * ice_xdp_xmit - submit packets to XDP ring for transmission * @dev: netdev * @n: number of XDP frames to be transmitted * @frames: XDP frames to be transmitted * @flags: transmit flags * * Returns number of frames successfully sent. Failed frames * will be free'ed by XDP core. * For error cases, a negative errno code is returned and no-frames * are transmitted (caller must handle freeing frames). */ int ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags) { … } /** * ice_alloc_mapped_page - recycle or make a new page * @rx_ring: ring to use * @bi: rx_buf struct to modify * * Returns true if the page was successfully allocated or * reused. */ static bool ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi) { … } /** * ice_alloc_rx_bufs - Replace used receive buffers * @rx_ring: ring to place buffers on * @cleaned_count: number of buffers to replace * * Returns false if all allocations were successful, true if any fail. Returning * true signals to the caller that we didn't replace cleaned_count buffers and * there is more work to do. * * First, try to clean "cleaned_count" Rx buffers. Then refill the cleaned Rx * buffers. Then bump tail at most one time. Grouping like this lets us avoid * multiple tail writes per call. */ bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count) { … } /** * ice_rx_buf_adjust_pg_offset - Prepare Rx buffer for reuse * @rx_buf: Rx buffer to adjust * @size: Size of adjustment * * Update the offset within page so that Rx buf will be ready to be reused. * For systems with PAGE_SIZE < 8192 this function will flip the page offset * so the second half of page assigned to Rx buffer will be used, otherwise * the offset is moved by "size" bytes */ static void ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size) { … } /** * ice_can_reuse_rx_page - Determine if page can be reused for another Rx * @rx_buf: buffer containing the page * * If page is reusable, we have a green light for calling ice_reuse_rx_page, * which will assign the current buffer to the buffer that next_to_alloc is * pointing to; otherwise, the DMA mapping needs to be destroyed and * page freed */ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) { … } /** * ice_add_xdp_frag - Add contents of Rx buffer to xdp buf as a frag * @rx_ring: Rx descriptor ring to transact packets on * @xdp: xdp buff to place the data into * @rx_buf: buffer containing page to add * @size: packet length from rx_desc * * This function will add the data contained in rx_buf->page to the xdp buf. * It will just attach the page as a frag. */ static int ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, struct ice_rx_buf *rx_buf, const unsigned int size) { … } /** * ice_reuse_rx_page - page flip buffer and store it back on the ring * @rx_ring: Rx descriptor ring to store buffers on * @old_buf: donor buffer to have page reused * * Synchronizes page for reuse by the adapter */ static void ice_reuse_rx_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *old_buf) { … } /** * ice_get_rx_buf - Fetch Rx buffer and synchronize data for use * @rx_ring: Rx descriptor ring to transact packets on * @size: size of buffer to add to skb * @ntc: index of next to clean element * * This function will pull an Rx buffer from the ring and synchronize it * for use by the CPU. */ static struct ice_rx_buf * ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, const unsigned int ntc) { … } /** * ice_build_skb - Build skb around an existing buffer * @rx_ring: Rx descriptor ring to transact packets on * @xdp: xdp_buff pointing to the data * * This function builds an skb around an existing XDP buffer, taking care * to set up the skb correctly and avoid any memcpy overhead. Driver has * already combined frags (if any) to skb_shared_info. */ static struct sk_buff * ice_build_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) { … } /** * ice_construct_skb - Allocate skb and populate it * @rx_ring: Rx descriptor ring to transact packets on * @xdp: xdp_buff pointing to the data * * This function allocates an skb. It then populates it with the page * data from the current receive descriptor, taking care to set up the * skb correctly. */ static struct sk_buff * ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) { … } /** * ice_put_rx_buf - Clean up used buffer and either recycle or free * @rx_ring: Rx descriptor ring to transact packets on * @rx_buf: Rx buffer to pull data from * * This function will clean up the contents of the rx_buf. It will either * recycle the buffer or unmap it and free the associated resources. */ static void ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf) { … } /** * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @rx_ring: Rx descriptor ring to transact packets on * @budget: Total limit on number of packets to process * * This function provides a "bounce buffer" approach to Rx interrupt * processing. The advantage to this is that on systems that have * expensive overhead for IOMMU access this provides a means of avoiding * it by maintaining the mapping of the page to the system. * * Returns amount of work completed */ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) { … } static void __ice_update_sample(struct ice_q_vector *q_vector, struct ice_ring_container *rc, struct dim_sample *sample, bool is_tx) { … } /** * ice_net_dim - Update net DIM algorithm * @q_vector: the vector associated with the interrupt * * Create a DIM sample and notify net_dim() so that it can possibly decide * a new ITR value based on incoming packets, bytes, and interrupts. * * This function is a no-op if the ring is not configured to dynamic ITR. */ static void ice_net_dim(struct ice_q_vector *q_vector) { … } /** * ice_buildreg_itr - build value for writing to the GLINT_DYN_CTL register * @itr_idx: interrupt throttling index * @itr: interrupt throttling value in usecs */ static u32 ice_buildreg_itr(u16 itr_idx, u16 itr) { … } /** * ice_enable_interrupt - re-enable MSI-X interrupt * @q_vector: the vector associated with the interrupt to enable * * If the VSI is down, the interrupt will not be re-enabled. Also, * when enabling the interrupt always reset the wb_on_itr to false * and trigger a software interrupt to clean out internal state. */ static void ice_enable_interrupt(struct ice_q_vector *q_vector) { … } /** * ice_set_wb_on_itr - set WB_ON_ITR for this q_vector * @q_vector: q_vector to set WB_ON_ITR on * * We need to tell hardware to write-back completed descriptors even when * interrupts are disabled. Descriptors will be written back on cache line * boundaries without WB_ON_ITR enabled, but if we don't enable WB_ON_ITR * descriptors may not be written back if they don't fill a cache line until * the next interrupt. * * This sets the write-back frequency to whatever was set previously for the * ITR indices. Also, set the INTENA_MSK bit to make sure hardware knows we * aren't meddling with the INTENA_M bit. */ static void ice_set_wb_on_itr(struct ice_q_vector *q_vector) { … } /** * ice_napi_poll - NAPI polling Rx/Tx cleanup routine * @napi: napi struct with our devices info in it * @budget: amount of work driver is allowed to do this pass, in packets * * This function will clean all queues associated with a q_vector. * * Returns the amount of work done */ int ice_napi_poll(struct napi_struct *napi, int budget) { … } /** * __ice_maybe_stop_tx - 2nd level check for Tx stop conditions * @tx_ring: the ring to be checked * @size: the size buffer we want to assure is available * * Returns -EBUSY if a stop is needed, else 0 */ static int __ice_maybe_stop_tx(struct ice_tx_ring *tx_ring, unsigned int size) { … } /** * ice_maybe_stop_tx - 1st level check for Tx stop conditions * @tx_ring: the ring to be checked * @size: the size buffer we want to assure is available * * Returns 0 if stop is not needed */ static int ice_maybe_stop_tx(struct ice_tx_ring *tx_ring, unsigned int size) { … } /** * ice_tx_map - Build the Tx descriptor * @tx_ring: ring to send buffer on * @first: first buffer info buffer to use * @off: pointer to struct that holds offload parameters * * This function loops over the skb data pointed to by *first * and gets a physical address for each memory location and programs * it and the length into the transmit descriptor. */ static void ice_tx_map(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first, struct ice_tx_offload_params *off) { … } /** * ice_tx_csum - Enable Tx checksum offloads * @first: pointer to the first descriptor * @off: pointer to struct that holds offload parameters * * Returns 0 or error (negative) if checksum offload can't happen, 1 otherwise. */ static int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off) { … } /** * ice_tx_prepare_vlan_flags - prepare generic Tx VLAN tagging flags for HW * @tx_ring: ring to send buffer on * @first: pointer to struct ice_tx_buf * * Checks the skb and set up correspondingly several generic transmit flags * related to VLAN tagging for the HW, such as VLAN, DCB, etc. */ static void ice_tx_prepare_vlan_flags(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first) { … } /** * ice_tso - computes mss and TSO length to prepare for TSO * @first: pointer to struct ice_tx_buf * @off: pointer to struct that holds offload parameters * * Returns 0 or error (negative) if TSO can't happen, 1 otherwise. */ static int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off) { … } /** * ice_txd_use_count - estimate the number of descriptors needed for Tx * @size: transmit request size in bytes * * Due to hardware alignment restrictions (4K alignment), we need to * assume that we can have no more than 12K of data per descriptor, even * though each descriptor can take up to 16K - 1 bytes of aligned memory. * Thus, we need to divide by 12K. But division is slow! Instead, * we decompose the operation into shifts and one relatively cheap * multiply operation. * * To divide by 12K, we first divide by 4K, then divide by 3: * To divide by 4K, shift right by 12 bits * To divide by 3, multiply by 85, then divide by 256 * (Divide by 256 is done by shifting right by 8 bits) * Finally, we add one to round up. Because 256 isn't an exact multiple of * 3, we'll underestimate near each multiple of 12K. This is actually more * accurate as we have 4K - 1 of wiggle room that we can fit into the last * segment. For our purposes this is accurate out to 1M which is orders of * magnitude greater than our largest possible GSO size. * * This would then be implemented as: * return (((size >> 12) * 85) >> 8) + ICE_DESCS_FOR_SKB_DATA_PTR; * * Since multiplication and division are commutative, we can reorder * operations into: * return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR; */ static unsigned int ice_txd_use_count(unsigned int size) { … } /** * ice_xmit_desc_count - calculate number of Tx descriptors needed * @skb: send buffer * * Returns number of data descriptors needed for this skb. */ static unsigned int ice_xmit_desc_count(struct sk_buff *skb) { … } /** * __ice_chk_linearize - Check if there are more than 8 buffers per packet * @skb: send buffer * * Note: This HW can't DMA more than 8 buffers to build a packet on the wire * and so we need to figure out the cases where we need to linearize the skb. * * For TSO we need to count the TSO header and segment payload separately. * As such we need to check cases where we have 7 fragments or more as we * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for * the segment payload in the first descriptor, and another 7 for the * fragments. */ static bool __ice_chk_linearize(struct sk_buff *skb) { … } /** * ice_chk_linearize - Check if there are more than 8 fragments per packet * @skb: send buffer * @count: number of buffers used * * Note: Our HW can't scatter-gather more than 8 fragments to build * a packet on the wire and so we need to figure out the cases where we * need to linearize the skb. */ static bool ice_chk_linearize(struct sk_buff *skb, unsigned int count) { … } /** * ice_tstamp - set up context descriptor for hardware timestamp * @tx_ring: pointer to the Tx ring to send buffer on * @skb: pointer to the SKB we're sending * @first: Tx buffer * @off: Tx offload parameters */ static void ice_tstamp(struct ice_tx_ring *tx_ring, struct sk_buff *skb, struct ice_tx_buf *first, struct ice_tx_offload_params *off) { … } /** * ice_xmit_frame_ring - Sends buffer on Tx ring * @skb: send buffer * @tx_ring: ring to send buffer on * * Returns NETDEV_TX_OK if sent, else an error code */ static netdev_tx_t ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) { … } /** * ice_start_xmit - Selects the correct VSI and Tx queue to send buffer * @skb: send buffer * @netdev: network interface device structure * * Returns NETDEV_TX_OK if sent, else an error code */ netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev) { … } /** * ice_get_dscp_up - return the UP/TC value for a SKB * @dcbcfg: DCB config that contains DSCP to UP/TC mapping * @skb: SKB to query for info to determine UP/TC * * This function is to only be called when the PF is in L3 DSCP PFC mode */ static u8 ice_get_dscp_up(struct ice_dcbx_cfg *dcbcfg, struct sk_buff *skb) { … } u16 ice_select_queue(struct net_device *netdev, struct sk_buff *skb, struct net_device *sb_dev) { … } /** * ice_clean_ctrl_tx_irq - interrupt handler for flow director Tx queue * @tx_ring: tx_ring to clean */ void ice_clean_ctrl_tx_irq(struct ice_tx_ring *tx_ring) { … }