/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * SN Platform GRU Driver * * GRU DRIVER TABLES, MACROS, externs, etc * * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. */ #ifndef __GRUTABLES_H__ #define __GRUTABLES_H__ /* * GRU Chiplet: * The GRU is a user addressible memory accelerator. It provides * several forms of load, store, memset, bcopy instructions. In addition, it * contains special instructions for AMOs, sending messages to message * queues, etc. * * The GRU is an integral part of the node controller. It connects * directly to the cpu socket. In its current implementation, there are 2 * GRU chiplets in the node controller on each blade (~node). * * The entire GRU memory space is fully coherent and cacheable by the cpus. * * Each GRU chiplet has a physical memory map that looks like the following: * * +-----------------+ * |/////////////////| * |/////////////////| * |/////////////////| * |/////////////////| * |/////////////////| * |/////////////////| * |/////////////////| * |/////////////////| * +-----------------+ * | system control | * +-----------------+ _______ +-------------+ * |/////////////////| / | | * |/////////////////| / | | * |/////////////////| / | instructions| * |/////////////////| / | | * |/////////////////| / | | * |/////////////////| / |-------------| * |/////////////////| / | | * +-----------------+ | | * | context 15 | | data | * +-----------------+ | | * | ...... | \ | | * +-----------------+ \____________ +-------------+ * | context 1 | * +-----------------+ * | context 0 | * +-----------------+ * * Each of the "contexts" is a chunk of memory that can be mmaped into user * space. The context consists of 2 parts: * * - an instruction space that can be directly accessed by the user * to issue GRU instructions and to check instruction status. * * - a data area that acts as normal RAM. * * User instructions contain virtual addresses of data to be accessed by the * GRU. The GRU contains a TLB that is used to convert these user virtual * addresses to physical addresses. * * The "system control" area of the GRU chiplet is used by the kernel driver * to manage user contexts and to perform functions such as TLB dropin and * purging. * * One context may be reserved for the kernel and used for cross-partition * communication. The GRU will also be used to asynchronously zero out * large blocks of memory (not currently implemented). * * * Tables: * * VDATA-VMA Data - Holds a few parameters. Head of linked list of * GTS tables for threads using the GSEG * GTS - Gru Thread State - contains info for managing a GSEG context. A * GTS is allocated for each thread accessing a * GSEG. * GTD - GRU Thread Data - contains shadow copy of GRU data when GSEG is * not loaded into a GRU * GMS - GRU Memory Struct - Used to manage TLB shootdowns. Tracks GRUs * where a GSEG has been loaded. Similar to * an mm_struct but for GRU. * * GS - GRU State - Used to manage the state of a GRU chiplet * BS - Blade State - Used to manage state of all GRU chiplets * on a blade * * * Normal task tables for task using GRU. * - 2 threads in process * - 2 GSEGs open in process * - GSEG1 is being used by both threads * - GSEG2 is used only by thread 2 * * task -->| * task ---+---> mm ->------ (notifier) -------+-> gms * | | * |--> vma -> vdata ---> gts--->| GSEG1 (thread1) * | | | * | +-> gts--->| GSEG1 (thread2) * | | * |--> vma -> vdata ---> gts--->| GSEG2 (thread2) * . * . * * GSEGs are marked DONTCOPY on fork * * At open * file.private_data -> NULL * * At mmap, * vma -> vdata * * After gseg reference * vma -> vdata ->gts * * After fork * parent * vma -> vdata -> gts * child * (vma is not copied) * */ #include <linux/refcount.h> #include <linux/rmap.h> #include <linux/interrupt.h> #include <linux/mutex.h> #include <linux/wait.h> #include <linux/mmu_notifier.h> #include <linux/mm_types.h> #include "gru.h" #include "grulib.h" #include "gruhandles.h" extern struct gru_stats_s gru_stats; extern struct gru_blade_state *gru_base[]; extern unsigned long gru_start_paddr, gru_end_paddr; extern void *gru_start_vaddr; extern unsigned int gru_max_gids; #define GRU_MAX_BLADES … #define GRU_MAX_GRUS … #define GRU_DRIVER_ID_STR … #define GRU_DRIVER_VERSION_STR … /* * GRU statistics. */ struct gru_stats_s { … }; enum mcs_op { … }; struct mcs_op_statistic { … }; extern struct mcs_op_statistic mcs_op_statistics[mcsop_last]; #define OPT_DPRINT … #define OPT_STATS … #define IRQ_GRU … /* Delay in jiffies between attempts to assign a GRU context */ #define GRU_ASSIGN_DELAY … /* * If a process has it's context stolen, min delay in jiffies before trying to * steal a context from another process. */ #define GRU_STEAL_DELAY … #define STAT(id) … #ifdef CONFIG_SGI_GRU_DEBUG #define gru_dbg(dev, fmt, x...) … #else #define gru_dbg … #endif /*----------------------------------------------------------------------------- * ASID management */ #define MAX_ASID … #define MIN_ASID … #define ASID_INC … /* Generate a GRU asid value from a GRU base asid & a virtual address. */ #define VADDR_HI_BIT … #define GRUREGION(addr) … #define GRUASID(asid, addr) … /*------------------------------------------------------------------------------ * File & VMS Tables */ struct gru_state; /* * This structure is pointed to from the mmstruct via the notifier pointer. * There is one of these per address space. */ struct gru_mm_tracker { … } __attribute__ ((packed)); struct gru_mm_struct { … }; /* * One of these structures is allocated when a GSEG is mmaped. The * structure is pointed to by the vma->vm_private_data field in the vma struct. */ struct gru_vma_data { … }; /* * One of these is allocated for each thread accessing a mmaped GRU. A linked * list of these structure is hung off the struct gru_vma_data in the mm_struct. */ struct gru_thread_state { … }; /* * Threaded programs actually allocate an array of GSEGs when a context is * created. Each thread uses a separate GSEG. TSID is the index into the GSEG * array. */ #define TSID(a, v) … #define UGRUADDR(gts) … #define NULLCTX … /*----------------------------------------------------------------------------- * GRU State Tables */ /* * One of these exists for each GRU chiplet. */ struct gru_state { … }; /* * This structure contains the GRU state for all the GRUs on a blade. */ struct gru_blade_state { … }; /*----------------------------------------------------------------------------- * Address Primitives */ #define get_tfm_for_cpu(g, c) … #define get_tfh_by_index(g, i) … #define get_tgh_by_index(g, i) … #define get_cbe_by_index(g, i) … /*----------------------------------------------------------------------------- * Useful Macros */ /* Given a blade# & chiplet#, get a pointer to the GRU */ #define get_gru(b, c) … /* Number of bytes to save/restore when unloading/loading GRU contexts */ #define DSR_BYTES(dsr) … #define CBR_BYTES(cbr) … /* Convert a user CB number to the actual CBRNUM */ #define thread_cbr_number(gts, n) … /* Convert a gid to a pointer to the GRU */ #define GID_TO_GRU(gid) … /* Scan all active GRUs in a GRU bitmap */ #define for_each_gru_in_bitmap(gid, map) … /* Scan all active GRUs on a specific blade */ #define for_each_gru_on_blade(gru, nid, i) … /* Scan all GRUs */ #define foreach_gid(gid) … /* Scan all active GTSs on a gru. Note: must hold ss_lock to use this macro. */ #define for_each_gts_on_gru(gts, gru, ctxnum) … /* Scan each CBR whose bit is set in a TFM (or copy of) */ #define for_each_cbr_in_tfm(i, map) … /* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */ #define for_each_cbr_in_allocation_map(i, map, k) … #define gseg_physical_address(gru, ctxnum) … #define gseg_virtual_address(gru, ctxnum) … /*----------------------------------------------------------------------------- * Lock / Unlock GRU handles * Use the "delresp" bit in the handle as a "lock" bit. */ /* Lock hierarchy checking enabled only in emulator */ /* 0 = lock failed, 1 = locked */ static inline int __trylock_handle(void *h) { … } static inline void __lock_handle(void *h) { … } static inline void __unlock_handle(void *h) { … } static inline int trylock_cch_handle(struct gru_context_configuration_handle *cch) { … } static inline void lock_cch_handle(struct gru_context_configuration_handle *cch) { … } static inline void unlock_cch_handle(struct gru_context_configuration_handle *cch) { … } static inline void lock_tgh_handle(struct gru_tlb_global_handle *tgh) { … } static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh) { … } static inline int is_kernel_context(struct gru_thread_state *gts) { … } /* * The following are for Nehelem-EX. A more general scheme is needed for * future processors. */ #define UV_MAX_INT_CORES … #define uv_cpu_socket_number(p) … #define uv_cpu_ht_number(p) … #define uv_cpu_core_number(p) … /*----------------------------------------------------------------------------- * Function prototypes & externs */ struct gru_unload_context_req; extern const struct vm_operations_struct gru_vm_ops; extern struct device *grudev; extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid); extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma, int tsid); extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma, int tsid); extern struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts); extern void gru_load_context(struct gru_thread_state *gts); extern void gru_steal_context(struct gru_thread_state *gts); extern void gru_unload_context(struct gru_thread_state *gts, int savestate); extern int gru_update_cch(struct gru_thread_state *gts); extern void gts_drop(struct gru_thread_state *gts); extern void gru_tgh_flush_init(struct gru_state *gru); extern int gru_kservices_init(void); extern void gru_kservices_exit(void); extern irqreturn_t gru0_intr(int irq, void *dev_id); extern irqreturn_t gru1_intr(int irq, void *dev_id); extern irqreturn_t gru_intr_mblade(int irq, void *dev_id); extern int gru_dump_chiplet_request(unsigned long arg); extern long gru_get_gseg_statistics(unsigned long arg); extern int gru_handle_user_call_os(unsigned long address); extern int gru_user_flush_tlb(unsigned long arg); extern int gru_user_unload_context(unsigned long arg); extern int gru_get_exception_detail(unsigned long arg); extern int gru_set_context_option(unsigned long address); extern int gru_check_context_placement(struct gru_thread_state *gts); extern int gru_cpu_fault_map_id(void); extern struct vm_area_struct *gru_find_vma(unsigned long vaddr); extern void gru_flush_all_tlb(struct gru_state *gru); extern int gru_proc_init(void); extern void gru_proc_exit(void); extern struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, int cbr_au_count, int dsr_au_count, unsigned char tlb_preload_count, int options, int tsid); extern unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count, signed char *cbmap); extern unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count, signed char *dsmap); extern vm_fault_t gru_fault(struct vm_fault *vmf); extern struct gru_mm_struct *gru_register_mmu_notifier(void); extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms); extern int gru_ktest(unsigned long arg); extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, unsigned long len); extern unsigned long gru_options; #endif /* __GRUTABLES_H__ */