// SPDX-License-Identifier: GPL-2.0 OR MIT /* * Copyright 2015-2022 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include <linux/pci.h> #include <linux/acpi.h> #include "kfd_crat.h" #include "kfd_priv.h" #include "kfd_topology.h" #include "amdgpu.h" #include "amdgpu_amdkfd.h" /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. * GPU processor ID are expressed with Bit[31]=1. * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs * used in the CRAT. */ static uint32_t gpu_processor_id_low = …; /* Return the next available gpu_processor_id and increment it for next GPU * @total_cu_count - Total CUs present in the GPU including ones * masked off */ static inline unsigned int get_and_inc_gpu_processor_id( unsigned int total_cu_count) { … } static struct kfd_gpu_cache_info kaveri_cache_info[] = …; static struct kfd_gpu_cache_info carrizo_cache_info[] = …; #define hawaii_cache_info … #define tonga_cache_info … #define fiji_cache_info … #define polaris10_cache_info … #define polaris11_cache_info … #define polaris12_cache_info … #define vegam_cache_info … /* NOTE: L1 cache information has been updated and L2/L3 * cache information has been added for Vega10 and * newer ASICs. The unit for cache_size is KiB. * In future, check & update cache details * for every new ASIC is required. */ static struct kfd_gpu_cache_info vega10_cache_info[] = …; static struct kfd_gpu_cache_info raven_cache_info[] = …; static struct kfd_gpu_cache_info renoir_cache_info[] = …; static struct kfd_gpu_cache_info vega12_cache_info[] = …; static struct kfd_gpu_cache_info vega20_cache_info[] = …; static struct kfd_gpu_cache_info aldebaran_cache_info[] = …; static struct kfd_gpu_cache_info navi10_cache_info[] = …; static struct kfd_gpu_cache_info vangogh_cache_info[] = …; static struct kfd_gpu_cache_info navi14_cache_info[] = …; static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = …; static struct kfd_gpu_cache_info navy_flounder_cache_info[] = …; static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = …; static struct kfd_gpu_cache_info beige_goby_cache_info[] = …; static struct kfd_gpu_cache_info yellow_carp_cache_info[] = …; static struct kfd_gpu_cache_info gfx1037_cache_info[] = …; static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = …; static struct kfd_gpu_cache_info dummy_cache_info[] = …; static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, struct crat_subtype_computeunit *cu) { … } static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, struct crat_subtype_computeunit *cu) { … } /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct * topology device present in the device_list */ static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu, struct list_head *device_list) { … } static struct kfd_mem_properties * find_subtype_mem(uint32_t heap_type, uint32_t flags, uint32_t width, struct kfd_topology_device *dev) { … } /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct * topology device present in the device_list */ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, struct list_head *device_list) { … } /* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct * topology device present in the device_list */ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, struct list_head *device_list) { … } /* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct * topology device present in the device_list */ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, struct list_head *device_list) { … } /* kfd_parse_subtype - parse subtypes and attach it to correct topology device * present in the device_list * @sub_type_hdr - subtype section of crat_image * @device_list - list of topology devices present in this crat_image */ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr, struct list_head *device_list) { … } /* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT * create a kfd_topology_device and add in to device_list. Also parse * CRAT subtypes and attach it to appropriate kfd_topology_device * @crat_image - input image containing CRAT * @device_list - [OUT] list of kfd_topology_device generated after * parsing crat_image * @proximity_domain - Proximity domain of the first device in the table * * Return - 0 if successful else -ve value */ int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, uint32_t proximity_domain) { … } static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, struct kfd_gpu_cache_info *pcache_info) { … } static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev, struct kfd_gpu_cache_info *pcache_info) { … } int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info) { … } /* Memory required to create Virtual CRAT. * Since there is no easy way to predict the amount of memory required, the * following amount is allocated for GPU Virtual CRAT. This is * expected to cover all known conditions. But to be safe additional check * is put in the code to ensure we don't overwrite. */ #define VCRAT_SIZE_FOR_GPU … /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node * * @numa_node_id: CPU NUMA node id * @avail_size: Available size in the memory * @sub_type_hdr: Memory into which compute info will be filled in * * Return 0 if successful else return -ve value */ static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size, int proximity_domain, struct crat_subtype_computeunit *sub_type_hdr) { … } /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node * * @numa_node_id: CPU NUMA node id * @avail_size: Available size in the memory * @sub_type_hdr: Memory into which compute info will be filled in * * Return 0 if successful else return -ve value */ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size, int proximity_domain, struct crat_subtype_memory *sub_type_hdr) { … } #ifdef CONFIG_X86_64 static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, uint32_t *num_entries, struct crat_subtype_iolink *sub_type_hdr) { … } #endif /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU * * @pcrat_image: Fill in VCRAT for CPU * @size: [IN] allocated size of crat_image. * [OUT] actual size of data filled in crat_image */ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) { … } static int kfd_fill_gpu_memory_affinity(int *avail_size, struct kfd_node *kdev, uint8_t type, uint64_t size, struct crat_subtype_memory *sub_type_hdr, uint32_t proximity_domain, const struct kfd_local_mem_info *local_mem_info) { … } #ifdef CONFIG_ACPI_NUMA static void kfd_find_numa_node_in_srat(struct kfd_node *kdev) { … } #endif #define KFD_CRAT_INTRA_SOCKET_WEIGHT … #define KFD_CRAT_XGMI_WEIGHT … /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU * to its NUMA node * @avail_size: Available size in the memory * @kdev - [IN] GPU device * @sub_type_hdr: Memory into which io link info will be filled in * @proximity_domain - proximity domain of the GPU node * * Return 0 if successful else return -ve value */ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, struct kfd_node *kdev, struct crat_subtype_iolink *sub_type_hdr, uint32_t proximity_domain) { … } static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, struct kfd_node *kdev, struct kfd_node *peer_kdev, struct crat_subtype_iolink *sub_type_hdr, uint32_t proximity_domain_from, uint32_t proximity_domain_to) { … } /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU * * @pcrat_image: Fill in VCRAT for GPU * @size: [IN] allocated size of crat_image. * [OUT] actual size of data filled in crat_image */ static int kfd_create_vcrat_image_gpu(void *pcrat_image, size_t *size, struct kfd_node *kdev, uint32_t proximity_domain) { … } /* kfd_create_crat_image_virtual - Allocates memory for CRAT image and * creates a Virtual CRAT (VCRAT) image * * NOTE: Call kfd_destroy_crat_image to free CRAT image memory * * @crat_image: VCRAT image created because ACPI does not have a * CRAT for this device * @size: [OUT] size of virtual crat_image * @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device * COMPUTE_UNIT_GPU - Create VCRAT for GPU * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU * -- this option is not currently implemented. * The assumption is that all AMD APUs will have CRAT * @kdev: Valid kfd_node required if flags contain COMPUTE_UNIT_GPU * * Return 0 if successful else return -ve value */ int kfd_create_crat_image_virtual(void **crat_image, size_t *size, int flags, struct kfd_node *kdev, uint32_t proximity_domain) { … } /* kfd_destroy_crat_image * * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..) * */ void kfd_destroy_crat_image(void *crat_image) { … }