memory.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0

/*
 * Copyright 2016-2022 HabanaLabs, Ltd.
 * All Rights Reserved.
 */

#include <uapi/drm/habanalabs_accel.h>
#include "habanalabs.h"
#include "../include/hw_ip/mmu/mmu_general.h"

#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/pci-p2pdma.h>

MODULE_IMPORT_NS(…);

#define HL_MMU_DEBUG …

/* use small pages for supporting non-pow2 (32M/40M/48M) DRAM phys page sizes */
#define DRAM_POOL_PAGE_SIZE …

#define MEM_HANDLE_INVALID …

static int allocate_timestamps_buffers(struct hl_fpriv *hpriv,
			struct hl_mem_in *args, u64 *handle);

static int set_alloc_page_size(struct hl_device *hdev, struct hl_mem_in *args, u32 *page_size)
{ … }

/*
 * The va ranges in context object contain a list with the available chunks of
 * device virtual memory.
 * There is one range for host allocations and one for DRAM allocations.
 *
 * On initialization each range contains one chunk of all of its available
 * virtual range which is a half of the total device virtual range.
 *
 * On each mapping of physical pages, a suitable virtual range chunk (with a
 * minimum size) is selected from the list. If the chunk size equals the
 * requested size, the chunk is returned. Otherwise, the chunk is split into
 * two chunks - one to return as result and a remainder to stay in the list.
 *
 * On each Unmapping of a virtual address, the relevant virtual chunk is
 * returned to the list. The chunk is added to the list and if its edges match
 * the edges of the adjacent chunks (means a contiguous chunk can be created),
 * the chunks are merged.
 *
 * On finish, the list is checked to have only one chunk of all the relevant
 * virtual range (which is a half of the device total virtual range).
 * If not (means not all mappings were unmapped), a warning is printed.
 */

/*
 * alloc_device_memory() - allocate device memory.
 * @ctx: pointer to the context structure.
 * @args: host parameters containing the requested size.
 * @ret_handle: result handle.
 *
 * This function does the following:
 * - Allocate the requested size rounded up to 'dram_page_size' pages.
 * - Return unique handle for later map/unmap/free.
 */
static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
				u32 *ret_handle)
{ … }

/**
 * dma_map_host_va() - DMA mapping of the given host virtual address.
 * @hdev: habanalabs device structure.
 * @addr: the host virtual address of the memory area.
 * @size: the size of the memory area.
 * @p_userptr: pointer to result userptr structure.
 *
 * This function does the following:
 * - Allocate userptr structure.
 * - Pin the given host memory using the userptr structure.
 * - Perform DMA mapping to have the DMA addresses of the pages.
 */
static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
				struct hl_userptr **p_userptr)
{ … }

/**
 * dma_unmap_host_va() - DMA unmapping of the given host virtual address.
 * @hdev: habanalabs device structure.
 * @userptr: userptr to free.
 *
 * This function does the following:
 * - Unpins the physical pages.
 * - Frees the userptr structure.
 */
static void dma_unmap_host_va(struct hl_device *hdev,
				struct hl_userptr *userptr)
{ … }

/**
 * dram_pg_pool_do_release() - free DRAM pages pool
 * @ref: pointer to reference object.
 *
 * This function does the following:
 * - Frees the idr structure of physical pages handles.
 * - Frees the generic pool of DRAM physical pages.
 */
static void dram_pg_pool_do_release(struct kref *ref)
{ … }

/**
 * free_phys_pg_pack() - free physical page pack.
 * @hdev: habanalabs device structure.
 * @phys_pg_pack: physical page pack to free.
 *
 * This function does the following:
 * - For DRAM memory only
 *   - iterate over the pack, free each physical block structure by
 *     returning it to the general pool.
 * - Free the hl_vm_phys_pg_pack structure.
 */
static void free_phys_pg_pack(struct hl_device *hdev,
				struct hl_vm_phys_pg_pack *phys_pg_pack)
{ … }

/**
 * free_device_memory() - free device memory.
 * @ctx: pointer to the context structure.
 * @args: host parameters containing the requested size.
 *
 * This function does the following:
 * - Free the device memory related to the given handle.
 */
static int free_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args)
{ … }

/**
 * clear_va_list_locked() - free virtual addresses list.
 * @hdev: habanalabs device structure.
 * @va_list: list of virtual addresses to free.
 *
 * This function does the following:
 * - Iterate over the list and free each virtual addresses block.
 *
 * This function should be called only when va_list lock is taken.
 */
static void clear_va_list_locked(struct hl_device *hdev,
		struct list_head *va_list)
{ … }

/**
 * print_va_list_locked() - print virtual addresses list.
 * @hdev: habanalabs device structure.
 * @va_list: list of virtual addresses to print.
 *
 * This function does the following:
 * - Iterate over the list and print each virtual addresses block.
 *
 * This function should be called only when va_list lock is taken.
 */
static void print_va_list_locked(struct hl_device *hdev,
		struct list_head *va_list)
{ … }

/**
 * merge_va_blocks_locked() - merge a virtual block if possible.
 * @hdev: pointer to the habanalabs device structure.
 * @va_list: pointer to the virtual addresses block list.
 * @va_block: virtual block to merge with adjacent blocks.
 *
 * This function does the following:
 * - Merge the given blocks with the adjacent blocks if their virtual ranges
 *   create a contiguous virtual range.
 *
 * This Function should be called only when va_list lock is taken.
 */
static void merge_va_blocks_locked(struct hl_device *hdev,
		struct list_head *va_list, struct hl_vm_va_block *va_block)
{ … }

/**
 * add_va_block_locked() - add a virtual block to the virtual addresses list.
 * @hdev: pointer to the habanalabs device structure.
 * @va_list: pointer to the virtual addresses block list.
 * @start: start virtual address.
 * @end: end virtual address.
 *
 * This function does the following:
 * - Add the given block to the virtual blocks list and merge with other blocks
 *   if a contiguous virtual block can be created.
 *
 * This Function should be called only when va_list lock is taken.
 */
static int add_va_block_locked(struct hl_device *hdev,
		struct list_head *va_list, u64 start, u64 end)
{ … }

/**
 * add_va_block() - wrapper for add_va_block_locked.
 * @hdev: pointer to the habanalabs device structure.
 * @va_range: pointer to the virtual addresses range object.
 * @start: start virtual address.
 * @end: end virtual address.
 *
 * This function does the following:
 * - Takes the list lock and calls add_va_block_locked.
 */
static inline int add_va_block(struct hl_device *hdev,
		struct hl_va_range *va_range, u64 start, u64 end)
{ … }

/**
 * is_hint_crossing_range() - check if hint address crossing specified reserved.
 * @range_type: virtual space range type.
 * @start_addr: start virtual address.
 * @size: block size.
 * @prop: asic properties structure to retrieve reserved ranges from.
 */
static inline bool is_hint_crossing_range(enum hl_va_range_type range_type,
		u64 start_addr, u32 size, struct asic_fixed_properties *prop) { … }

/**
 * get_va_block() - get a virtual block for the given size and alignment.
 *
 * @hdev: pointer to the habanalabs device structure.
 * @va_range: pointer to the virtual addresses range.
 * @size: requested block size.
 * @hint_addr: hint for requested address by the user.
 * @va_block_align: required alignment of the virtual block start address.
 * @range_type: va range type (host, dram)
 * @flags: additional memory flags, currently only uses HL_MEM_FORCE_HINT
 *
 * This function does the following:
 * - Iterate on the virtual block list to find a suitable virtual block for the
 *   given size, hint address and alignment.
 * - Reserve the requested block and update the list.
 * - Return the start address of the virtual block.
 */
static u64 get_va_block(struct hl_device *hdev,
				struct hl_va_range *va_range,
				u64 size, u64 hint_addr, u32 va_block_align,
				enum hl_va_range_type range_type,
				u32 flags)
{ … }

/*
 * hl_reserve_va_block() - reserve a virtual block of a given size.
 * @hdev: pointer to the habanalabs device structure.
 * @ctx: current context
 * @type: virtual addresses range type.
 * @size: requested block size.
 * @alignment: required alignment in bytes of the virtual block start address,
 *             0 means no alignment.
 *
 * This function does the following:
 * - Iterate on the virtual block list to find a suitable virtual block for the
 *   given size and alignment.
 * - Reserve the requested block and update the list.
 * - Return the start address of the virtual block.
 */
u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
		enum hl_va_range_type type, u64 size, u32 alignment)
{ … }

/**
 * hl_get_va_range_type() - get va_range type for the given address and size.
 * @ctx: context to fetch va_range from.
 * @address: the start address of the area we want to validate.
 * @size: the size in bytes of the area we want to validate.
 * @type: returned va_range type.
 *
 * Return: true if the area is inside a valid range, false otherwise.
 */
static int hl_get_va_range_type(struct hl_ctx *ctx, u64 address, u64 size,
			enum hl_va_range_type *type)
{ … }

/**
 * hl_unreserve_va_block() - wrapper for add_va_block to unreserve a va block.
 * @hdev: pointer to the habanalabs device structure
 * @ctx: pointer to the context structure.
 * @start_addr: start virtual address.
 * @size: number of bytes to unreserve.
 *
 * This function does the following:
 * - Takes the list lock and calls add_va_block_locked.
 */
int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
		u64 start_addr, u64 size)
{ … }

/**
 * init_phys_pg_pack_from_userptr() - initialize physical page pack from host
 *                                    memory
 * @ctx: pointer to the context structure.
 * @userptr: userptr to initialize from.
 * @pphys_pg_pack: result pointer.
 * @force_regular_page: tell the function to ignore huge page optimization,
 *                      even if possible. Needed for cases where the device VA
 *                      is allocated before we know the composition of the
 *                      physical pages
 *
 * This function does the following:
 * - Create a physical page pack from the physical pages related to the given
 *   virtual block.
 */
static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
				struct hl_userptr *userptr,
				struct hl_vm_phys_pg_pack **pphys_pg_pack,
				bool force_regular_page)
{ … }

/**
 * map_phys_pg_pack() - maps the physical page pack..
 * @ctx: pointer to the context structure.
 * @vaddr: start address of the virtual area to map from.
 * @phys_pg_pack: the pack of physical pages to map to.
 *
 * This function does the following:
 * - Maps each chunk of virtual memory to matching physical chunk.
 * - Stores number of successful mappings in the given argument.
 * - Returns 0 on success, error code otherwise.
 */
static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
				struct hl_vm_phys_pg_pack *phys_pg_pack)
{ … }

/**
 * unmap_phys_pg_pack() - unmaps the physical page pack.
 * @ctx: pointer to the context structure.
 * @vaddr: start address of the virtual area to unmap.
 * @phys_pg_pack: the pack of physical pages to unmap.
 */
static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
				struct hl_vm_phys_pg_pack *phys_pg_pack)
{ … }

/**
 * map_device_va() - map the given memory.
 * @ctx: pointer to the context structure.
 * @args: host parameters with handle/host virtual address.
 * @device_addr: pointer to result device virtual address.
 *
 * This function does the following:
 * - If given a physical device memory handle, map to a device virtual block
 *   and return the start address of this block.
 * - If given a host virtual address and size, find the related physical pages,
 *   map a device virtual block to this pages and return the start address of
 *   this block.
 */
static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device_addr)
{ … }

/* Should be called while the context's mem_hash_lock is taken */
static struct hl_vm_hash_node *get_vm_hash_node_locked(struct hl_ctx *ctx, u64 vaddr)
{ … }

/**
 * unmap_device_va() - unmap the given device virtual address.
 * @ctx: pointer to the context structure.
 * @args: host parameters with device virtual address to unmap.
 * @ctx_free: true if in context free flow, false otherwise.
 *
 * This function does the following:
 * - unmap the physical pages related to the given virtual address.
 * - return the device virtual block to the virtual block list.
 */
static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
				bool ctx_free)
{ … }

static int map_block(struct hl_device *hdev, u64 address, u64 *handle, u32 *size)
{ … }

static void hw_block_vm_close(struct vm_area_struct *vma)
{ … }

static const struct vm_operations_struct hw_block_vm_ops = …;

/**
 * hl_hw_block_mmap() - mmap a hw block to user.
 * @hpriv: pointer to the private data of the fd
 * @vma: pointer to vm_area_struct of the process
 *
 * Driver increments context reference for every HW block mapped in order
 * to prevent user from closing FD without unmapping first
 */
int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
{ … }

static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size,
			struct device *dev, enum dma_data_direction dir)
{ … }

static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages,
						u64 page_size, u64 exported_size, u64 offset,
						struct device *dev, enum dma_data_direction dir)
{ … }

static int hl_dmabuf_attach(struct dma_buf *dmabuf,
				struct dma_buf_attachment *attachment)
{ … }

static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
					enum dma_data_direction dir)
{ … }

static void hl_unmap_dmabuf(struct dma_buf_attachment *attachment,
				  struct sg_table *sgt,
				  enum dma_data_direction dir)
{ … }

static struct hl_vm_hash_node *memhash_node_export_get(struct hl_ctx *ctx, u64 addr)
{ … }

static void memhash_node_export_put(struct hl_ctx *ctx, struct hl_vm_hash_node *hnode)
{ … }

static void hl_release_dmabuf(struct dma_buf *dmabuf)
{ … }

static const struct dma_buf_ops habanalabs_dmabuf_ops = …;

static int export_dmabuf(struct hl_ctx *ctx,
				struct hl_dmabuf_priv *hl_dmabuf,
				u64 total_size, int flags, int *dmabuf_fd)
{ … }

static int validate_export_params_common(struct hl_device *hdev, u64 addr, u64 size, u64 offset)
{ … }

static int validate_export_params_no_mmu(struct hl_device *hdev, u64 device_addr, u64 size)
{ … }

static int validate_export_params(struct hl_device *hdev, u64 device_addr, u64 size, u64 offset,
					struct hl_vm_phys_pg_pack *phys_pg_pack)
{ … }

static struct hl_vm_phys_pg_pack *get_phys_pg_pack_from_hash_node(struct hl_device *hdev,
							struct hl_vm_hash_node *hnode)
{ … }

/**
 * export_dmabuf_from_addr() - export a dma-buf object for the given memory
 *                             address and size.
 * @ctx: pointer to the context structure.
 * @addr: device address.
 * @size: size of device memory to export.
 * @offset: the offset into the buffer from which to start exporting
 * @flags: DMA-BUF file/FD flags.
 * @dmabuf_fd: pointer to result FD that represents the dma-buf object.
 *
 * Create and export a dma-buf object for an existing memory allocation inside
 * the device memory, and return a FD which is associated with the dma-buf
 * object.
 *
 * Return: 0 on success, non-zero for failure.
 */
static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 offset,
					int flags, int *dmabuf_fd)
{ … }

static void ts_buff_release(struct hl_mmap_mem_buf *buf)
{ … }

static int hl_ts_mmap(struct hl_mmap_mem_buf *buf, struct vm_area_struct *vma, void *args)
{ … }

static int hl_ts_alloc_buf(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args)
{ … }

static struct hl_mmap_mem_buf_behavior hl_ts_behavior = …;

/**
 * allocate_timestamps_buffers() - allocate timestamps buffers
 * This function will allocate ts buffer that will later on be mapped to the user
 * in order to be able to read the timestamp.
 * in addition it'll allocate an extra buffer for registration management.
 * since we cannot fail during registration for out-of-memory situation, so
 * we'll prepare a pool which will be used as user interrupt nodes and instead
 * of dynamically allocating nodes while registration we'll pick the node from
 * this pool. in addition it'll add node to the mapping hash which will be used
 * to map user ts buffer to the internal kernel ts buffer.
 * @hpriv: pointer to the private data of the fd
 * @args: ioctl input
 * @handle: user timestamp buffer handle as an output
 */
static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in *args, u64 *handle)
{ … }

int hl_mem_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
{ … }

static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
				u32 npages, u64 start, u32 offset,
				struct hl_userptr *userptr)
{ … }

/**
 * hl_pin_host_memory() - pins a chunk of host memory.
 * @hdev: pointer to the habanalabs device structure.
 * @addr: the host virtual address of the memory area.
 * @size: the size of the memory area.
 * @userptr: pointer to hl_userptr structure.
 *
 * This function does the following:
 * - Pins the physical pages.
 * - Create an SG list from those pages.
 */
int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
					struct hl_userptr *userptr)
{ … }

/*
 * hl_unpin_host_memory - unpins a chunk of host memory.
 * @hdev: pointer to the habanalabs device structure
 * @userptr: pointer to hl_userptr structure
 *
 * This function does the following:
 * - Unpins the physical pages related to the host memory
 * - Free the SG list
 */
void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
{ … }

/**
 * hl_userptr_delete_list() - clear userptr list.
 * @hdev: pointer to the habanalabs device structure.
 * @userptr_list: pointer to the list to clear.
 *
 * This function does the following:
 * - Iterates over the list and unpins the host memory and frees the userptr
 *   structure.
 */
void hl_userptr_delete_list(struct hl_device *hdev,
				struct list_head *userptr_list)
{ … }

/**
 * hl_userptr_is_pinned() - returns whether the given userptr is pinned.
 * @hdev: pointer to the habanalabs device structure.
 * @addr: user address to check.
 * @size: user block size to check.
 * @userptr_list: pointer to the list to clear.
 * @userptr: pointer to userptr to check.
 *
 * This function does the following:
 * - Iterates over the list and checks if the given userptr is in it, means is
 *   pinned. If so, returns true, otherwise returns false.
 */
bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
				u32 size, struct list_head *userptr_list,
				struct hl_userptr **userptr)
{ … }

/**
 * va_range_init() - initialize virtual addresses range.
 * @hdev: pointer to the habanalabs device structure.
 * @va_ranges: pointer to va_ranges array.
 * @range_type: virtual address range type.
 * @start: range start address, inclusive.
 * @end: range end address, inclusive.
 * @page_size: page size for this va_range.
 *
 * This function does the following:
 * - Initializes the virtual addresses list of the given range with the given
 *   addresses.
 */
static int va_range_init(struct hl_device *hdev, struct hl_va_range **va_ranges,
				enum hl_va_range_type range_type, u64 start,
				u64 end, u32 page_size)
{ … }

/**
 * va_range_fini() - clear a virtual addresses range.
 * @hdev: pointer to the habanalabs structure.
 * @va_range: pointer to virtual addresses range.
 *
 * This function does the following:
 * - Frees the virtual addresses block list and its lock.
 */
static void va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range)
{ … }

/**
 * vm_ctx_init_with_ranges() - initialize virtual memory for context.
 * @ctx: pointer to the habanalabs context structure.
 * @host_range_start: host virtual addresses range start.
 * @host_range_end: host virtual addresses range end.
 * @host_page_size: host page size.
 * @host_huge_range_start: host virtual addresses range start for memory
 *                         allocated with huge pages.
 * @host_huge_range_end: host virtual addresses range end for memory allocated
 *                        with huge pages.
 * @host_huge_page_size: host huge page size.
 * @dram_range_start: dram virtual addresses range start.
 * @dram_range_end: dram virtual addresses range end.
 * @dram_page_size: dram page size.
 *
 * This function initializes the following:
 * - MMU for context.
 * - Virtual address to area descriptor hashtable.
 * - Virtual block list of available virtual memory.
 */
static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
					u64 host_range_start,
					u64 host_range_end,
					u32 host_page_size,
					u64 host_huge_range_start,
					u64 host_huge_range_end,
					u32 host_huge_page_size,
					u64 dram_range_start,
					u64 dram_range_end,
					u32 dram_page_size)
{ … }

int hl_vm_ctx_init(struct hl_ctx *ctx)
{ … }

/**
 * hl_vm_ctx_fini() - virtual memory teardown of context.
 * @ctx: pointer to the habanalabs context structure.
 *
 * This function perform teardown the following:
 * - Virtual block list of available virtual memory.
 * - Virtual address to area descriptor hashtable.
 * - MMU for context.
 *
 * In addition this function does the following:
 * - Unmaps the existing hashtable nodes if the hashtable is not empty. The
 *   hashtable should be empty as no valid mappings should exist at this
 *   point.
 * - Frees any existing physical page list from the idr which relates to the
 *   current context asid.
 * - This function checks the virtual block list for correctness. At this point
 *   the list should contain one element which describes the whole virtual
 *   memory range of the context. Otherwise, a warning is printed.
 */
void hl_vm_ctx_fini(struct hl_ctx *ctx)
{ … }

/**
 * hl_vm_init() - initialize virtual memory module.
 * @hdev: pointer to the habanalabs device structure.
 *
 * This function initializes the following:
 * - MMU module.
 * - DRAM physical pages pool of 2MB.
 * - Idr for device memory allocation handles.
 */
int hl_vm_init(struct hl_device *hdev)
{ … }

/**
 * hl_vm_fini() - virtual memory module teardown.
 * @hdev: pointer to the habanalabs device structure.
 *
 * This function perform teardown to the following:
 * - Idr for device memory allocation handles.
 * - DRAM physical pages pool of 2MB.
 * - MMU module.
 */
void hl_vm_fini(struct hl_device *hdev)
{ … }

/**
 * hl_hw_block_mem_init() - HW block memory initialization.
 * @ctx: pointer to the habanalabs context structure.
 *
 * This function initializes the HW block virtual mapped addresses list and
 * it's lock.
 */
void hl_hw_block_mem_init(struct hl_ctx *ctx)
{ … }

/**
 * hl_hw_block_mem_fini() - HW block memory teardown.
 * @ctx: pointer to the habanalabs context structure.
 *
 * This function clears the HW block virtual mapped addresses list and destroys
 * it's lock.
 */
void hl_hw_block_mem_fini(struct hl_ctx *ctx)
{ … }
linux/drivers/accel/habanalabs/common/memory.c