/* * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet * driver for Linux. * * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/if_vlan.h> #include <linux/ip.h> #include <net/ipv6.h> #include <net/tcp.h> #include <linux/dma-mapping.h> #include <linux/prefetch.h> #include "t4vf_common.h" #include "t4vf_defs.h" #include "../cxgb4/t4_regs.h" #include "../cxgb4/t4_values.h" #include "../cxgb4/t4fw_api.h" #include "../cxgb4/t4_msg.h" /* * Constants ... */ enum { … }; /* * Software state per TX descriptor. */ struct tx_sw_desc { … }; /* * Software state per RX Free List descriptor. We keep track of the allocated * FL page, its size, and its PCI DMA address (if the page is mapped). The FL * page size and its PCI DMA mapped state are stored in the low bits of the * PCI DMA address as per below. */ struct rx_sw_desc { … }; /* * The low bits of rx_sw_desc.dma_addr have special meaning. Note that the * SGE also uses the low 4 bits to determine the size of the buffer. It uses * those bits to index into the SGE_FL_BUFFER_SIZE[index] register array. * Since we only use SGE_FL_BUFFER_SIZE0 and SGE_FL_BUFFER_SIZE1, these low 4 * bits can only contain a 0 or a 1 to indicate which size buffer we're giving * to the SGE. Thus, our software state of "is the buffer mapped for DMA" is * maintained in an inverse sense so the hardware never sees that bit high. */ enum { … }; /** * get_buf_addr - return DMA buffer address of software descriptor * @sdesc: pointer to the software buffer descriptor * * Return the DMA buffer address of a software descriptor (stripping out * our low-order flag bits). */ static inline dma_addr_t get_buf_addr(const struct rx_sw_desc *sdesc) { … } /** * is_buf_mapped - is buffer mapped for DMA? * @sdesc: pointer to the software buffer descriptor * * Determine whether the buffer associated with a software descriptor in * mapped for DMA or not. */ static inline bool is_buf_mapped(const struct rx_sw_desc *sdesc) { … } /** * need_skb_unmap - does the platform need unmapping of sk_buffs? * * Returns true if the platform needs sk_buff unmapping. The compiler * optimizes away unnecessary code if this returns true. */ static inline int need_skb_unmap(void) { … } /** * txq_avail - return the number of available slots in a TX queue * @tq: the TX queue * * Returns the number of available descriptors in a TX queue. */ static inline unsigned int txq_avail(const struct sge_txq *tq) { … } /** * fl_cap - return the capacity of a Free List * @fl: the Free List * * Returns the capacity of a Free List. The capacity is less than the * size because an Egress Queue Index Unit worth of descriptors needs to * be left unpopulated, otherwise the Producer and Consumer indices PIDX * and CIDX will match and the hardware will think the FL is empty. */ static inline unsigned int fl_cap(const struct sge_fl *fl) { … } /** * fl_starving - return whether a Free List is starving. * @adapter: pointer to the adapter * @fl: the Free List * * Tests specified Free List to see whether the number of buffers * available to the hardware has falled below our "starvation" * threshold. */ static inline bool fl_starving(const struct adapter *adapter, const struct sge_fl *fl) { … } /** * map_skb - map an skb for DMA to the device * @dev: the egress net device * @skb: the packet to map * @addr: a pointer to the base of the DMA mapping array * * Map an skb for DMA to the device and return an array of DMA addresses. */ static int map_skb(struct device *dev, const struct sk_buff *skb, dma_addr_t *addr) { … } static void unmap_sgl(struct device *dev, const struct sk_buff *skb, const struct ulptx_sgl *sgl, const struct sge_txq *tq) { … } /** * free_tx_desc - reclaims TX descriptors and their buffers * @adapter: the adapter * @tq: the TX queue to reclaim descriptors from * @n: the number of descriptors to reclaim * @unmap: whether the buffers should be unmapped for DMA * * Reclaims TX descriptors from an SGE TX queue and frees the associated * TX buffers. Called with the TX queue lock held. */ static void free_tx_desc(struct adapter *adapter, struct sge_txq *tq, unsigned int n, bool unmap) { … } /* * Return the number of reclaimable descriptors in a TX queue. */ static inline int reclaimable(const struct sge_txq *tq) { … } /** * reclaim_completed_tx - reclaims completed TX descriptors * @adapter: the adapter * @tq: the TX queue to reclaim completed descriptors from * @unmap: whether the buffers should be unmapped for DMA * * Reclaims TX descriptors that the SGE has indicated it has processed, * and frees the associated buffers if possible. Called with the TX * queue locked. */ static inline void reclaim_completed_tx(struct adapter *adapter, struct sge_txq *tq, bool unmap) { … } /** * get_buf_size - return the size of an RX Free List buffer. * @adapter: pointer to the associated adapter * @sdesc: pointer to the software buffer descriptor */ static inline int get_buf_size(const struct adapter *adapter, const struct rx_sw_desc *sdesc) { … } /** * free_rx_bufs - free RX buffers on an SGE Free List * @adapter: the adapter * @fl: the SGE Free List to free buffers from * @n: how many buffers to free * * Release the next @n buffers on an SGE Free List RX queue. The * buffers must be made inaccessible to hardware before calling this * function. */ static void free_rx_bufs(struct adapter *adapter, struct sge_fl *fl, int n) { … } /** * unmap_rx_buf - unmap the current RX buffer on an SGE Free List * @adapter: the adapter * @fl: the SGE Free List * * Unmap the current buffer on an SGE Free List RX queue. The * buffer must be made inaccessible to HW before calling this function. * * This is similar to @free_rx_bufs above but does not free the buffer. * Do note that the FL still loses any further access to the buffer. * This is used predominantly to "transfer ownership" of an FL buffer * to another entity (typically an skb's fragment list). */ static void unmap_rx_buf(struct adapter *adapter, struct sge_fl *fl) { … } /** * ring_fl_db - righ doorbell on free list * @adapter: the adapter * @fl: the Free List whose doorbell should be rung ... * * Tell the Scatter Gather Engine that there are new free list entries * available. */ static inline void ring_fl_db(struct adapter *adapter, struct sge_fl *fl) { … } /** * set_rx_sw_desc - initialize software RX buffer descriptor * @sdesc: pointer to the softwore RX buffer descriptor * @page: pointer to the page data structure backing the RX buffer * @dma_addr: PCI DMA address (possibly with low-bit flags) */ static inline void set_rx_sw_desc(struct rx_sw_desc *sdesc, struct page *page, dma_addr_t dma_addr) { … } /* * Support for poisoning RX buffers ... */ #define POISON_BUF_VAL … static inline void poison_buf(struct page *page, size_t sz) { … } /** * refill_fl - refill an SGE RX buffer ring * @adapter: the adapter * @fl: the Free List ring to refill * @n: the number of new buffers to allocate * @gfp: the gfp flags for the allocations * * (Re)populate an SGE free-buffer queue with up to @n new packet buffers, * allocated with the supplied gfp flags. The caller must assure that * @n does not exceed the queue's capacity -- i.e. (cidx == pidx) _IN * EGRESS QUEUE UNITS_ indicates an empty Free List! Returns the number * of buffers allocated. If afterwards the queue is found critically low, * mark it as starving in the bitmap of starving FLs. */ static unsigned int refill_fl(struct adapter *adapter, struct sge_fl *fl, int n, gfp_t gfp) { … } /* * Refill a Free List to its capacity or the Maximum Refill Increment, * whichever is smaller ... */ static inline void __refill_fl(struct adapter *adapter, struct sge_fl *fl) { … } /** * alloc_ring - allocate resources for an SGE descriptor ring * @dev: the PCI device's core device * @nelem: the number of descriptors * @hwsize: the size of each hardware descriptor * @swsize: the size of each software descriptor * @busaddrp: the physical PCI bus address of the allocated ring * @swringp: return address pointer for software ring * @stat_size: extra space in hardware ring for status information * * Allocates resources for an SGE descriptor ring, such as TX queues, * free buffer lists, response queues, etc. Each SGE ring requires * space for its hardware descriptors plus, optionally, space for software * state associated with each hardware entry (the metadata). The function * returns three values: the virtual address for the hardware ring (the * return value of the function), the PCI bus address of the hardware * ring (in *busaddrp), and the address of the software ring (in swringp). * Both the hardware and software rings are returned zeroed out. */ static void *alloc_ring(struct device *dev, size_t nelem, size_t hwsize, size_t swsize, dma_addr_t *busaddrp, void *swringp, size_t stat_size) { … } /** * sgl_len - calculates the size of an SGL of the given capacity * @n: the number of SGL entries * * Calculates the number of flits (8-byte units) needed for a Direct * Scatter/Gather List that can hold the given number of entries. */ static inline unsigned int sgl_len(unsigned int n) { … } /** * flits_to_desc - returns the num of TX descriptors for the given flits * @flits: the number of flits * * Returns the number of TX descriptors needed for the supplied number * of flits. */ static inline unsigned int flits_to_desc(unsigned int flits) { … } /** * is_eth_imm - can an Ethernet packet be sent as immediate data? * @skb: the packet * * Returns whether an Ethernet packet is small enough to fit completely as * immediate data. */ static inline int is_eth_imm(const struct sk_buff *skb) { … } /** * calc_tx_flits - calculate the number of flits for a packet TX WR * @skb: the packet * * Returns the number of flits needed for a TX Work Request for the * given Ethernet packet, including the needed WR and CPL headers. */ static inline unsigned int calc_tx_flits(const struct sk_buff *skb) { … } /** * write_sgl - populate a Scatter/Gather List for a packet * @skb: the packet * @tq: the TX queue we are writing into * @sgl: starting location for writing the SGL * @end: points right after the end of the SGL * @start: start offset into skb main-body data to include in the SGL * @addr: the list of DMA bus addresses for the SGL elements * * Generates a Scatter/Gather List for the buffers that make up a packet. * The caller must provide adequate space for the SGL that will be written. * The SGL includes all of the packet's page fragments and the data in its * main body except for the first @start bytes. @pos must be 16-byte * aligned and within a TX descriptor with available space. @end points * write after the end of the SGL but does not account for any potential * wrap around, i.e., @end > @tq->stat. */ static void write_sgl(const struct sk_buff *skb, struct sge_txq *tq, struct ulptx_sgl *sgl, u64 *end, unsigned int start, const dma_addr_t *addr) { … } /** * ring_tx_db - check and potentially ring a TX queue's doorbell * @adapter: the adapter * @tq: the TX queue * @n: number of new descriptors to give to HW * * Ring the doorbel for a TX queue. */ static inline void ring_tx_db(struct adapter *adapter, struct sge_txq *tq, int n) { … } /** * inline_tx_skb - inline a packet's data into TX descriptors * @skb: the packet * @tq: the TX queue where the packet will be inlined * @pos: starting position in the TX queue to inline the packet * * Inline a packet's contents directly into TX descriptors, starting at * the given position within the TX DMA ring. * Most of the complexity of this operation is dealing with wrap arounds * in the middle of the packet we want to inline. */ static void inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *tq, void *pos) { … } /* * Figure out what HW csum a packet wants and return the appropriate control * bits. */ static u64 hwcsum(enum chip_type chip, const struct sk_buff *skb) { … } /* * Stop an Ethernet TX queue and record that state change. */ static void txq_stop(struct sge_eth_txq *txq) { … } /* * Advance our software state for a TX queue by adding n in use descriptors. */ static inline void txq_advance(struct sge_txq *tq, unsigned int n) { … } /** * t4vf_eth_xmit - add a packet to an Ethernet TX queue * @skb: the packet * @dev: the egress net device * * Add a packet to an SGE Ethernet TX queue. Runs with softirqs disabled. */ netdev_tx_t t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev) { … } /** * copy_frags - copy fragments from gather list into skb_shared_info * @skb: destination skb * @gl: source internal packet gather list * @offset: packet start offset in first page * * Copy an internal packet gather list into a Linux skb_shared_info * structure. */ static inline void copy_frags(struct sk_buff *skb, const struct pkt_gl *gl, unsigned int offset) { … } /** * t4vf_pktgl_to_skb - build an sk_buff from a packet gather list * @gl: the gather list * @skb_len: size of sk_buff main body if it carries fragments * @pull_len: amount of data to move to the sk_buff's main body * * Builds an sk_buff from the given packet gather list. Returns the * sk_buff or %NULL if sk_buff allocation failed. */ static struct sk_buff *t4vf_pktgl_to_skb(const struct pkt_gl *gl, unsigned int skb_len, unsigned int pull_len) { … } /** * t4vf_pktgl_free - free a packet gather list * @gl: the gather list * * Releases the pages of a packet gather list. We do not own the last * page on the list and do not free it. */ static void t4vf_pktgl_free(const struct pkt_gl *gl) { … } /** * do_gro - perform Generic Receive Offload ingress packet processing * @rxq: ingress RX Ethernet Queue * @gl: gather list for ingress packet * @pkt: CPL header for last packet fragment * * Perform Generic Receive Offload (GRO) ingress packet processing. * We use the standard Linux GRO interfaces for this. */ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl, const struct cpl_rx_pkt *pkt) { … } /** * t4vf_ethrx_handler - process an ingress ethernet packet * @rspq: the response queue that received the packet * @rsp: the response queue descriptor holding the RX_PKT message * @gl: the gather list of packet fragments * * Process an ingress ethernet packet and deliver it to the stack. */ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp, const struct pkt_gl *gl) { … } /** * is_new_response - check if a response is newly written * @rc: the response control descriptor * @rspq: the response queue * * Returns true if a response descriptor contains a yet unprocessed * response. */ static inline bool is_new_response(const struct rsp_ctrl *rc, const struct sge_rspq *rspq) { … } /** * restore_rx_bufs - put back a packet's RX buffers * @gl: the packet gather list * @fl: the SGE Free List * @frags: how many fragments in @si * * Called when we find out that the current packet, @si, can't be * processed right away for some reason. This is a very rare event and * there's no effort to make this suspension/resumption process * particularly efficient. * * We implement the suspension by putting all of the RX buffers associated * with the current packet back on the original Free List. The buffers * have already been unmapped and are left unmapped, we mark them as * unmapped in order to prevent further unmapping attempts. (Effectively * this function undoes the series of @unmap_rx_buf calls which were done * to create the current packet's gather list.) This leaves us ready to * restart processing of the packet the next time we start processing the * RX Queue ... */ static void restore_rx_bufs(const struct pkt_gl *gl, struct sge_fl *fl, int frags) { … } /** * rspq_next - advance to the next entry in a response queue * @rspq: the queue * * Updates the state of a response queue to advance it to the next entry. */ static inline void rspq_next(struct sge_rspq *rspq) { … } /** * process_responses - process responses from an SGE response queue * @rspq: the ingress response queue to process * @budget: how many responses can be processed in this round * * Process responses from a Scatter Gather Engine response queue up to * the supplied budget. Responses include received packets as well as * control messages from firmware or hardware. * * Additionally choose the interrupt holdoff time for the next interrupt * on this queue. If the system is under memory shortage use a fairly * long delay to help recovery. */ static int process_responses(struct sge_rspq *rspq, int budget) { … } /** * napi_rx_handler - the NAPI handler for RX processing * @napi: the napi instance * @budget: how many packets we can process in this round * * Handler for new data events when using NAPI. This does not need any * locking or protection from interrupts as data interrupts are off at * this point and other adapter interrupts do not interfere (the latter * in not a concern at all with MSI-X as non-data interrupts then have * a separate handler). */ static int napi_rx_handler(struct napi_struct *napi, int budget) { … } /* * The MSI-X interrupt handler for an SGE response queue for the NAPI case * (i.e., response queue serviced by NAPI polling). */ irqreturn_t t4vf_sge_intr_msix(int irq, void *cookie) { … } /* * Process the indirect interrupt entries in the interrupt queue and kick off * NAPI for each queue that has generated an entry. */ static unsigned int process_intrq(struct adapter *adapter) { … } /* * The MSI interrupt handler handles data events from SGE response queues as * well as error and other async events as they all use the same MSI vector. */ static irqreturn_t t4vf_intr_msi(int irq, void *cookie) { … } /** * t4vf_intr_handler - select the top-level interrupt handler * @adapter: the adapter * * Selects the top-level interrupt handler based on the type of interrupts * (MSI-X or MSI). */ irq_handler_t t4vf_intr_handler(struct adapter *adapter) { … } /** * sge_rx_timer_cb - perform periodic maintenance of SGE RX queues * @t: Rx timer * * Runs periodically from a timer to perform maintenance of SGE RX queues. * * a) Replenishes RX queues that have run out due to memory shortage. * Normally new RX buffers are added when existing ones are consumed but * when out of memory a queue can become empty. We schedule NAPI to do * the actual refill. */ static void sge_rx_timer_cb(struct timer_list *t) { … } /** * sge_tx_timer_cb - perform periodic maintenance of SGE Tx queues * @t: Tx timer * * Runs periodically from a timer to perform maintenance of SGE TX queues. * * b) Reclaims completed Tx packets for the Ethernet queues. Normally * packets are cleaned up by new Tx packets, this timer cleans up packets * when no new packets are being submitted. This is essential for pktgen, * at least. */ static void sge_tx_timer_cb(struct timer_list *t) { … } /** * bar2_address - return the BAR2 address for an SGE Queue's Registers * @adapter: the adapter * @qid: the SGE Queue ID * @qtype: the SGE Queue Type (Egress or Ingress) * @pbar2_qid: BAR2 Queue ID or 0 for Queue ID inferred SGE Queues * * Returns the BAR2 address for the SGE Queue Registers associated with * @qid. If BAR2 SGE Registers aren't available, returns NULL. Also * returns the BAR2 Queue ID to be used with writes to the BAR2 SGE * Queue Registers. If the BAR2 Queue ID is 0, then "Inferred Queue ID" * Registers are supported (e.g. the Write Combining Doorbell Buffer). */ static void __iomem *bar2_address(struct adapter *adapter, unsigned int qid, enum t4_bar2_qtype qtype, unsigned int *pbar2_qid) { … } /** * t4vf_sge_alloc_rxq - allocate an SGE RX Queue * @adapter: the adapter * @rspq: pointer to to the new rxq's Response Queue to be filled in * @iqasynch: if 0, a normal rspq; if 1, an asynchronous event queue * @dev: the network device associated with the new rspq * @intr_dest: MSI-X vector index (overriden in MSI mode) * @fl: pointer to the new rxq's Free List to be filled in * @hnd: the interrupt handler to invoke for the rspq */ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq, bool iqasynch, struct net_device *dev, int intr_dest, struct sge_fl *fl, rspq_handler_t hnd) { … } /** * t4vf_sge_alloc_eth_txq - allocate an SGE Ethernet TX Queue * @adapter: the adapter * @txq: pointer to the new txq to be filled in * @dev: the network device * @devq: the network TX queue associated with the new txq * @iqid: the relative ingress queue ID to which events relating to * the new txq should be directed */ int t4vf_sge_alloc_eth_txq(struct adapter *adapter, struct sge_eth_txq *txq, struct net_device *dev, struct netdev_queue *devq, unsigned int iqid) { … } /* * Free the DMA map resources associated with a TX queue. */ static void free_txq(struct adapter *adapter, struct sge_txq *tq) { … } /* * Free the resources associated with a response queue (possibly including a * free list). */ static void free_rspq_fl(struct adapter *adapter, struct sge_rspq *rspq, struct sge_fl *fl) { … } /** * t4vf_free_sge_resources - free SGE resources * @adapter: the adapter * * Frees resources used by the SGE queue sets. */ void t4vf_free_sge_resources(struct adapter *adapter) { … } /** * t4vf_sge_start - enable SGE operation * @adapter: the adapter * * Start tasklets and timers associated with the DMA engine. */ void t4vf_sge_start(struct adapter *adapter) { … } /** * t4vf_sge_stop - disable SGE operation * @adapter: the adapter * * Stop tasklets and timers associated with the DMA engine. Note that * this is effective only if measures have been taken to disable any HW * events that may restart them. */ void t4vf_sge_stop(struct adapter *adapter) { … } /** * t4vf_sge_init - initialize SGE * @adapter: the adapter * * Performs SGE initialization needed every time after a chip reset. * We do not initialize any of the queue sets here, instead the driver * top-level must request those individually. We also do not enable DMA * here, that should be done after the queues have been set up. */ int t4vf_sge_init(struct adapter *adapter) { … }