// SPDX-License-Identifier: GPL-2.0-only /* * IBM Accelerator Family 'GenWQE' * * (C) Copyright IBM Corp. 2013 * * Author: Frank Haverkamp <[email protected]> * Author: Joerg-Stephan Vogt <[email protected]> * Author: Michael Jung <[email protected]> * Author: Michael Ruettger <[email protected]> */ /* * Miscelanous functionality used in the other GenWQE driver parts. */ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/vmalloc.h> #include <linux/page-flags.h> #include <linux/scatterlist.h> #include <linux/hugetlb.h> #include <linux/iommu.h> #include <linux/pci.h> #include <linux/dma-mapping.h> #include <linux/ctype.h> #include <linux/module.h> #include <linux/platform_device.h> #include <linux/delay.h> #include <linux/pgtable.h> #include "genwqe_driver.h" #include "card_base.h" #include "card_ddcb.h" /** * __genwqe_writeq() - Write 64-bit register * @cd: genwqe device descriptor * @byte_offs: byte offset within BAR * @val: 64-bit value * * Return: 0 if success; < 0 if error */ int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val) { … } /** * __genwqe_readq() - Read 64-bit register * @cd: genwqe device descriptor * @byte_offs: offset within BAR * * Return: value from register */ u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs) { … } /** * __genwqe_writel() - Write 32-bit register * @cd: genwqe device descriptor * @byte_offs: byte offset within BAR * @val: 32-bit value * * Return: 0 if success; < 0 if error */ int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val) { … } /** * __genwqe_readl() - Read 32-bit register * @cd: genwqe device descriptor * @byte_offs: offset within BAR * * Return: Value from register */ u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs) { … } /** * genwqe_read_app_id() - Extract app_id * @cd: genwqe device descriptor * @app_name: carrier used to pass-back name * @len: length of data for name * * app_unitcfg need to be filled with valid data first */ int genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len) { … } #define CRC32_POLYNOMIAL … static u32 crc32_tab[256]; /* crc32 lookup table */ /** * genwqe_init_crc32() - Prepare a lookup table for fast crc32 calculations * * Existing kernel functions seem to use a different polynom, * therefore we could not use them here. * * Genwqe's Polynomial = 0x20044009 */ void genwqe_init_crc32(void) { … } /** * genwqe_crc32() - Generate 32-bit crc as required for DDCBs * @buff: pointer to data buffer * @len: length of data for calculation * @init: initial crc (0xffffffff at start) * * polynomial = x^32 * + x^29 + x^18 + x^14 + x^3 + 1 (0x20044009) * * Example: 4 bytes 0x01 0x02 0x03 0x04 with init=0xffffffff should * result in a crc32 of 0xf33cb7d3. * * The existing kernel crc functions did not cover this polynom yet. * * Return: crc32 checksum. */ u32 genwqe_crc32(u8 *buff, size_t len, u32 init) { … } void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size, dma_addr_t *dma_handle) { … } void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size, void *vaddr, dma_addr_t dma_handle) { … } static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list, int num_pages) { … } static int genwqe_map_pages(struct genwqe_dev *cd, struct page **page_list, int num_pages, dma_addr_t *dma_list) { … } static int genwqe_sgl_size(int num_pages) { … } /* * genwqe_alloc_sync_sgl() - Allocate memory for sgl and overlapping pages * * Allocates memory for sgl and overlapping pages. Pages which might * overlap other user-space memory blocks are being cached for DMAs, * such that we do not run into syncronization issues. Data is copied * from user-space into the cached pages. */ int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, void __user *user_addr, size_t user_size, int write) { … } int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, dma_addr_t *dma_list) { … } /** * genwqe_free_sync_sgl() - Free memory for sgl and overlapping pages * @cd: genwqe device descriptor * @sgl: scatter gather list describing user-space memory * * After the DMA transfer has been completed we free the memory for * the sgl and the cached pages. Data is being transferred from cached * pages into user-space buffers. */ int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl) { … } /** * genwqe_user_vmap() - Map user-space memory to virtual kernel memory * @cd: pointer to genwqe device * @m: mapping params * @uaddr: user virtual address * @size: size of memory to be mapped * * We need to think about how we could speed this up. Of course it is * not a good idea to do this over and over again, like we are * currently doing it. Nevertheless, I am curious where on the path * the performance is spend. Most probably within the memory * allocation functions, but maybe also in the DMA mapping code. * * Restrictions: The maximum size of the possible mapping currently depends * on the amount of memory we can get using kzalloc() for the * page_list and pci_alloc_consistent for the sg_list. * The sg_list is currently itself not scattered, which could * be fixed with some effort. The page_list must be split into * PAGE_SIZE chunks too. All that will make the complicated * code more complicated. * * Return: 0 if success */ int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr, unsigned long size) { … } /** * genwqe_user_vunmap() - Undo mapping of user-space mem to virtual kernel * memory * @cd: pointer to genwqe device * @m: mapping params */ int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m) { … } /** * genwqe_card_type() - Get chip type SLU Configuration Register * @cd: pointer to the genwqe device descriptor * Return: 0: Altera Stratix-IV 230 * 1: Altera Stratix-IV 530 * 2: Altera Stratix-V A4 * 3: Altera Stratix-V A7 */ u8 genwqe_card_type(struct genwqe_dev *cd) { … } /** * genwqe_card_reset() - Reset the card * @cd: pointer to the genwqe device descriptor */ int genwqe_card_reset(struct genwqe_dev *cd) { … } int genwqe_read_softreset(struct genwqe_dev *cd) { … } /** * genwqe_set_interrupt_capability() - Configure MSI capability structure * @cd: pointer to the device * @count: number of vectors to allocate * Return: 0 if no error */ int genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count) { … } /** * genwqe_reset_interrupt_capability() - Undo genwqe_set_interrupt_capability() * @cd: pointer to the device */ void genwqe_reset_interrupt_capability(struct genwqe_dev *cd) { … } /** * set_reg_idx() - Fill array with data. Ignore illegal offsets. * @cd: card device * @r: debug register array * @i: index to desired entry * @m: maximum possible entries * @addr: addr which is read * @idx: index in debug array * @val: read value */ static int set_reg_idx(struct genwqe_dev *cd, struct genwqe_reg *r, unsigned int *i, unsigned int m, u32 addr, u32 idx, u64 val) { … } static int set_reg(struct genwqe_dev *cd, struct genwqe_reg *r, unsigned int *i, unsigned int m, u32 addr, u64 val) { … } int genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs, unsigned int max_regs, int all) { … } /** * genwqe_ffdc_buff_size() - Calculates the number of dump registers * @cd: genwqe device descriptor * @uid: unit ID */ int genwqe_ffdc_buff_size(struct genwqe_dev *cd, int uid) { … } /** * genwqe_ffdc_buff_read() - Implements LogoutExtendedErrorRegisters procedure * @cd: genwqe device descriptor * @uid: unit ID * @regs: register information * @max_regs: number of register entries */ int genwqe_ffdc_buff_read(struct genwqe_dev *cd, int uid, struct genwqe_reg *regs, unsigned int max_regs) { … } /** * genwqe_write_vreg() - Write register in virtual window * @cd: genwqe device descriptor * @reg: register (byte) offset within BAR * @val: value to write * @func: PCI virtual function * * Note, these registers are only accessible to the PF through the * VF-window. It is not intended for the VF to access. */ int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func) { … } /** * genwqe_read_vreg() - Read register in virtual window * @cd: genwqe device descriptor * @reg: register (byte) offset within BAR * @func: PCI virtual function * * Note, these registers are only accessible to the PF through the * VF-window. It is not intended for the VF to access. */ u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func) { … } /** * genwqe_base_clock_frequency() - Deteremine base clock frequency of the card * @cd: genwqe device descriptor * * Note: From a design perspective it turned out to be a bad idea to * use codes here to specifiy the frequency/speed values. An old * driver cannot understand new codes and is therefore always a * problem. Better is to measure out the value or put the * speed/frequency directly into a register which is always a valid * value for old as well as for new software. * * Return: Card clock in MHz */ int genwqe_base_clock_frequency(struct genwqe_dev *cd) { … } /** * genwqe_stop_traps() - Stop traps * @cd: genwqe device descriptor * * Before reading out the analysis data, we need to stop the traps. */ void genwqe_stop_traps(struct genwqe_dev *cd) { … } /** * genwqe_start_traps() - Start traps * @cd: genwqe device descriptor * * After having read the data, we can/must enable the traps again. */ void genwqe_start_traps(struct genwqe_dev *cd) { … }