// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* * Copyright(c) 2015 - 2020 Intel Corporation. * Copyright(c) 2021 Cornelis Networks. */ #include <linux/pci.h> #include <linux/netdevice.h> #include <linux/vmalloc.h> #include <linux/delay.h> #include <linux/xarray.h> #include <linux/module.h> #include <linux/printk.h> #include <linux/hrtimer.h> #include <linux/bitmap.h> #include <linux/numa.h> #include <rdma/rdma_vt.h> #include "hfi.h" #include "device.h" #include "common.h" #include "trace.h" #include "mad.h" #include "sdma.h" #include "debugfs.h" #include "verbs.h" #include "aspm.h" #include "affinity.h" #include "vnic.h" #include "exp_rcv.h" #include "netdev.h" #undef pr_fmt #define pr_fmt(fmt) … /* * min buffers we want to have per context, after driver */ #define HFI1_MIN_USER_CTXT_BUFCNT … #define HFI1_MIN_EAGER_BUFFER_SIZE … #define HFI1_MAX_EAGER_BUFFER_SIZE … #define NUM_IB_PORTS … /* * Number of user receive contexts we are configured to use (to allow for more * pio buffers per ctxt, etc.) Zero means use one user context per CPU. */ int num_user_contexts = …; module_param_named(num_user_contexts, num_user_contexts, int, 0444); MODULE_PARM_DESC(…) …; uint krcvqs[RXE_NUM_DATA_VL]; int krcvqsset; module_param_array(…); MODULE_PARM_DESC(…) …; /* computed based on above array */ unsigned long n_krcvqs; static unsigned hfi1_rcvarr_split = …; module_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO); MODULE_PARM_DESC(…) …; static uint eager_buffer_size = …; /* 8MB */ module_param(eager_buffer_size, uint, S_IRUGO); MODULE_PARM_DESC(…) …; static uint rcvhdrcnt = …; /* 2x the max eager buffer count */ module_param_named(rcvhdrcnt, rcvhdrcnt, uint, S_IRUGO); MODULE_PARM_DESC(…) …; static uint hfi1_hdrq_entsize = …; module_param_named(hdrq_entsize, hfi1_hdrq_entsize, uint, 0444); MODULE_PARM_DESC(…) …; unsigned int user_credit_return_threshold = …; /* default is 33% */ module_param(user_credit_return_threshold, uint, S_IRUGO); MODULE_PARM_DESC(…) …; DEFINE_XARRAY_FLAGS(…); static int hfi1_create_kctxt(struct hfi1_devdata *dd, struct hfi1_pportdata *ppd) { … } /* * Create the receive context array and one or more kernel contexts */ int hfi1_create_kctxts(struct hfi1_devdata *dd) { … } /* * Helper routines for the receive context reference count (rcd and uctxt). */ static void hfi1_rcd_init(struct hfi1_ctxtdata *rcd) { … } /** * hfi1_rcd_free - When reference is zero clean up. * @kref: pointer to an initialized rcd data structure * */ static void hfi1_rcd_free(struct kref *kref) { … } /** * hfi1_rcd_put - decrement reference for rcd * @rcd: pointer to an initialized rcd data structure * * Use this to put a reference after the init. */ int hfi1_rcd_put(struct hfi1_ctxtdata *rcd) { … } /** * hfi1_rcd_get - increment reference for rcd * @rcd: pointer to an initialized rcd data structure * * Use this to get a reference after the init. * * Return : reflect kref_get_unless_zero(), which returns non-zero on * increment, otherwise 0. */ int hfi1_rcd_get(struct hfi1_ctxtdata *rcd) { … } /** * allocate_rcd_index - allocate an rcd index from the rcd array * @dd: pointer to a valid devdata structure * @rcd: rcd data structure to assign * @index: pointer to index that is allocated * * Find an empty index in the rcd array, and assign the given rcd to it. * If the array is full, we are EBUSY. * */ static int allocate_rcd_index(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd, u16 *index) { … } /** * hfi1_rcd_get_by_index_safe - validate the ctxt index before accessing the * array * @dd: pointer to a valid devdata structure * @ctxt: the index of an possilbe rcd * * This is a wrapper for hfi1_rcd_get_by_index() to validate that the given * ctxt index is valid. * * The caller is responsible for making the _put(). * */ struct hfi1_ctxtdata *hfi1_rcd_get_by_index_safe(struct hfi1_devdata *dd, u16 ctxt) { … } /** * hfi1_rcd_get_by_index - get by index * @dd: pointer to a valid devdata structure * @ctxt: the index of an possilbe rcd * * We need to protect access to the rcd array. If access is needed to * one or more index, get the protecting spinlock and then increment the * kref. * * The caller is responsible for making the _put(). * */ struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt) { … } /* * Common code for user and kernel context create and setup. * NOTE: the initial kref is done here (hf1_rcd_init()). */ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, struct hfi1_ctxtdata **context) { … } /** * hfi1_free_ctxt - free context * @rcd: pointer to an initialized rcd data structure * * This wrapper is the free function that matches hfi1_create_ctxtdata(). * When a context is done being used (kernel or user), this function is called * for the "final" put to match the kref init from hfi1_create_ctxtdata(). * Other users of the context do a get/put sequence to make sure that the * structure isn't removed while in use. */ void hfi1_free_ctxt(struct hfi1_ctxtdata *rcd) { … } /* * Select the largest ccti value over all SLs to determine the intra- * packet gap for the link. * * called with cca_timer_lock held (to protect access to cca_timer * array), and rcu_read_lock() (to protect access to cc_state). */ void set_link_ipg(struct hfi1_pportdata *ppd) { … } static enum hrtimer_restart cca_timer_fn(struct hrtimer *t) { … } /* * Common code for initializing the physical port structure. */ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, struct hfi1_devdata *dd, u8 hw_pidx, u32 port) { … } /* * Do initialization for device that is only needed on * first detect, not on resets. */ static int loadtime_init(struct hfi1_devdata *dd) { … } /** * init_after_reset - re-initialize after a reset * @dd: the hfi1_ib device * * sanity check at least some of the values after reset, and * ensure no receive or transmit (explicitly, in case reset * failed */ static int init_after_reset(struct hfi1_devdata *dd) { … } static void enable_chip(struct hfi1_devdata *dd) { … } /** * create_workqueues - create per port workqueues * @dd: the hfi1_ib device */ static int create_workqueues(struct hfi1_devdata *dd) { … } /** * destroy_workqueues - destroy per port workqueues * @dd: the hfi1_ib device */ static void destroy_workqueues(struct hfi1_devdata *dd) { … } /** * enable_general_intr() - Enable the IRQs that will be handled by the * general interrupt handler. * @dd: valid devdata * */ static void enable_general_intr(struct hfi1_devdata *dd) { … } /** * hfi1_init - do the actual initialization sequence on the chip * @dd: the hfi1_ib device * @reinit: re-initializing, so don't allocate new memory * * Do the actual initialization sequence on the chip. This is done * both from the init routine called from the PCI infrastructure, and * when we reset the chip, or detect that it was reset internally, * or it's administratively re-enabled. * * Memory allocation here and in called routines is only done in * the first case (reinit == 0). We have to be careful, because even * without memory allocation, we need to re-write all the chip registers * TIDs, etc. after the reset or enable has completed. */ int hfi1_init(struct hfi1_devdata *dd, int reinit) { … } struct hfi1_devdata *hfi1_lookup(int unit) { … } /* * Stop the timers during unit shutdown, or after an error late * in initialization. */ static void stop_timers(struct hfi1_devdata *dd) { … } /** * shutdown_device - shut down a device * @dd: the hfi1_ib device * * This is called to make the device quiet when we are about to * unload the driver, and also when the device is administratively * disabled. It does not free any data structures. * Everything it does has to be setup again by hfi1_init(dd, 1) */ static void shutdown_device(struct hfi1_devdata *dd) { … } /** * hfi1_free_ctxtdata - free a context's allocated data * @dd: the hfi1_ib device * @rcd: the ctxtdata structure * * free up any allocated data for a context * It should never change any chip state, or global driver state. */ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) { … } /* * Release our hold on the shared asic data. If we are the last one, * return the structure to be finalized outside the lock. Must be * holding hfi1_dev_table lock. */ static struct hfi1_asic_data *release_asic_data(struct hfi1_devdata *dd) { … } static void finalize_asic_data(struct hfi1_devdata *dd, struct hfi1_asic_data *ad) { … } /** * hfi1_free_devdata - cleans up and frees per-unit data structure * @dd: pointer to a valid devdata structure * * It cleans up and frees all data structures set up by * by hfi1_alloc_devdata(). */ void hfi1_free_devdata(struct hfi1_devdata *dd) { … } /** * hfi1_alloc_devdata - Allocate our primary per-unit data structure. * @pdev: Valid PCI device * @extra: How many bytes to alloc past the default * * Must be done via verbs allocator, because the verbs cleanup process * both does cleanup and free of the data structure. * "extra" is for chip-specific data. */ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) { … } /* * Called from freeze mode handlers, and from PCI error * reporting code. Should be paranoid about state of * system and data structures. */ void hfi1_disable_after_error(struct hfi1_devdata *dd) { … } static void remove_one(struct pci_dev *); static int init_one(struct pci_dev *, const struct pci_device_id *); static void shutdown_one(struct pci_dev *); #define DRIVER_LOAD_MSG … #define PFX … const struct pci_device_id hfi1_pci_tbl[] = …; MODULE_DEVICE_TABLE(pci, hfi1_pci_tbl); static struct pci_driver hfi1_pci_driver = …; static void __init compute_krcvqs(void) { … } /* * Do all the generic driver unit- and chip-independent memory * allocation and initialization. */ static int __init hfi1_mod_init(void) { … } module_init(…) …; /* * Do the non-unit driver cleanup, memory free, etc. at unload. */ static void __exit hfi1_mod_cleanup(void) { … } module_exit(hfi1_mod_cleanup); /* this can only be called after a successful initialization */ static void cleanup_device_data(struct hfi1_devdata *dd) { … } /* * Clean up on unit shutdown, or error during unit load after * successful initialization. */ static void postinit_cleanup(struct hfi1_devdata *dd) { … } static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { … } static void wait_for_clients(struct hfi1_devdata *dd) { … } static void remove_one(struct pci_dev *pdev) { … } static void shutdown_one(struct pci_dev *pdev) { … } /** * hfi1_create_rcvhdrq - create a receive header queue * @dd: the hfi1_ib device * @rcd: the context data * * This must be contiguous memory (from an i/o perspective), and must be * DMA'able (which means for some systems, it will go through an IOMMU, * or be forced into a low address range). */ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) { … } /** * hfi1_setup_eagerbufs - llocate eager buffers, both kernel and user * contexts. * @rcd: the context we are setting up. * * Allocate the eager TID buffers and program them into hip. * They are no longer completely contiguous, we do multiple allocation * calls. Otherwise we get the OOM code involved, by asking for too * much per call, with disastrous results on some kernels. */ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) { … }