// SPDX-License-Identifier: GPL-2.0 or MIT /* Copyright 2023 Collabora ltd. */ #include #include #include #include "panthor_device.h" #include "panthor_gem.h" #include "panthor_heap.h" #include "panthor_mmu.h" #include "panthor_regs.h" /* * The GPU heap context is an opaque structure used by the GPU to track the * heap allocations. The driver should only touch it to initialize it (zero all * fields). Because the CPU and GPU can both access this structure it is * required to be GPU cache line aligned. */ #define HEAP_CONTEXT_SIZE 32 /** * struct panthor_heap_chunk_header - Heap chunk header */ struct panthor_heap_chunk_header { /** * @next: Next heap chunk in the list. * * This is a GPU VA. */ u64 next; /** @unknown: MBZ. */ u32 unknown[14]; }; /** * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks. */ struct panthor_heap_chunk { /** @node: Used to insert the heap chunk in panthor_heap::chunks. */ struct list_head node; /** @bo: Buffer object backing the heap chunk. */ struct panthor_kernel_bo *bo; }; /** * struct panthor_heap - Structure used to manage tiler heap contexts. */ struct panthor_heap { /** @chunks: List containing all heap chunks allocated so far. */ struct list_head chunks; /** @lock: Lock protecting insertion in the chunks list. */ struct mutex lock; /** @chunk_size: Size of each chunk. */ u32 chunk_size; /** @max_chunks: Maximum number of chunks. */ u32 max_chunks; /** * @target_in_flight: Number of in-flight render passes after which * we'd let the FW wait for fragment job to finish instead of allocating new chunks. */ u32 target_in_flight; /** @chunk_count: Number of heap chunks currently allocated. */ u32 chunk_count; }; #define MAX_HEAPS_PER_POOL 128 /** * struct panthor_heap_pool - Pool of heap contexts * * The pool is attached to a panthor_file and can't be shared across processes. */ struct panthor_heap_pool { /** @refcount: Reference count. */ struct kref refcount; /** @ptdev: Device. */ struct panthor_device *ptdev; /** @vm: VM this pool is bound to. */ struct panthor_vm *vm; /** @lock: Lock protecting access to @xa. */ struct rw_semaphore lock; /** @xa: Array storing panthor_heap objects. */ struct xarray xa; /** @gpu_contexts: Buffer object containing the GPU heap contexts. */ struct panthor_kernel_bo *gpu_contexts; }; static int panthor_heap_ctx_stride(struct panthor_device *ptdev) { u32 l2_features = ptdev->gpu_info.l2_features; u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features); return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size); } static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id) { return panthor_heap_ctx_stride(pool->ptdev) * id; } static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id) { return pool->gpu_contexts->kmap + panthor_get_heap_ctx_offset(pool, id); } static void panthor_free_heap_chunk(struct panthor_vm *vm, struct panthor_heap *heap, struct panthor_heap_chunk *chunk) { mutex_lock(&heap->lock); list_del(&chunk->node); heap->chunk_count--; mutex_unlock(&heap->lock); panthor_kernel_bo_destroy(chunk->bo); kfree(chunk); } static int panthor_alloc_heap_chunk(struct panthor_device *ptdev, struct panthor_vm *vm, struct panthor_heap *heap, bool initial_chunk) { struct panthor_heap_chunk *chunk; struct panthor_heap_chunk_header *hdr; int ret; chunk = kmalloc(sizeof(*chunk), GFP_KERNEL); if (!chunk) return -ENOMEM; chunk->bo = panthor_kernel_bo_create(ptdev, vm, heap->chunk_size, DRM_PANTHOR_BO_NO_MMAP, DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, PANTHOR_VM_KERNEL_AUTO_VA); if (IS_ERR(chunk->bo)) { ret = PTR_ERR(chunk->bo); goto err_free_chunk; } ret = panthor_kernel_bo_vmap(chunk->bo); if (ret) goto err_destroy_bo; hdr = chunk->bo->kmap; memset(hdr, 0, sizeof(*hdr)); if (initial_chunk && !list_empty(&heap->chunks)) { struct panthor_heap_chunk *prev_chunk; u64 prev_gpuva; prev_chunk = list_first_entry(&heap->chunks, struct panthor_heap_chunk, node); prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo); hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) | (heap->chunk_size >> 12); } panthor_kernel_bo_vunmap(chunk->bo); mutex_lock(&heap->lock); list_add(&chunk->node, &heap->chunks); heap->chunk_count++; mutex_unlock(&heap->lock); return 0; err_destroy_bo: panthor_kernel_bo_destroy(chunk->bo); err_free_chunk: kfree(chunk); return ret; } static void panthor_free_heap_chunks(struct panthor_vm *vm, struct panthor_heap *heap) { struct panthor_heap_chunk *chunk, *tmp; list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) panthor_free_heap_chunk(vm, heap, chunk); } static int panthor_alloc_heap_chunks(struct panthor_device *ptdev, struct panthor_vm *vm, struct panthor_heap *heap, u32 chunk_count) { int ret; u32 i; for (i = 0; i < chunk_count; i++) { ret = panthor_alloc_heap_chunk(ptdev, vm, heap, true); if (ret) return ret; } return 0; } static int panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle) { struct panthor_heap *heap; heap = xa_erase(&pool->xa, handle); if (!heap) return -EINVAL; panthor_free_heap_chunks(pool->vm, heap); mutex_destroy(&heap->lock); kfree(heap); return 0; } /** * panthor_heap_destroy() - Destroy a heap context * @pool: Pool this context belongs to. * @handle: Handle returned by panthor_heap_create(). */ int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle) { int ret; down_write(&pool->lock); ret = panthor_heap_destroy_locked(pool, handle); up_write(&pool->lock); return ret; } /** * panthor_heap_create() - Create a heap context * @pool: Pool to instantiate the heap context from. * @initial_chunk_count: Number of chunk allocated at initialization time. * Must be at least 1. * @chunk_size: The size of each chunk. Must be page-aligned and lie in the * [128k:8M] range. * @max_chunks: Maximum number of chunks that can be allocated. * @target_in_flight: Maximum number of in-flight render passes. * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap * context. * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk * assigned to the heap context. * * Return: a positive handle on success, a negative error otherwise. */ int panthor_heap_create(struct panthor_heap_pool *pool, u32 initial_chunk_count, u32 chunk_size, u32 max_chunks, u32 target_in_flight, u64 *heap_ctx_gpu_va, u64 *first_chunk_gpu_va) { struct panthor_heap *heap; struct panthor_heap_chunk *first_chunk; struct panthor_vm *vm; int ret = 0; u32 id; if (initial_chunk_count == 0) return -EINVAL; if (initial_chunk_count > max_chunks) return -EINVAL; if (!IS_ALIGNED(chunk_size, PAGE_SIZE) || chunk_size < SZ_128K || chunk_size > SZ_8M) return -EINVAL; down_read(&pool->lock); vm = panthor_vm_get(pool->vm); up_read(&pool->lock); /* The pool has been destroyed, we can't create a new heap. */ if (!vm) return -EINVAL; heap = kzalloc(sizeof(*heap), GFP_KERNEL); if (!heap) { ret = -ENOMEM; goto err_put_vm; } mutex_init(&heap->lock); INIT_LIST_HEAD(&heap->chunks); heap->chunk_size = chunk_size; heap->max_chunks = max_chunks; heap->target_in_flight = target_in_flight; ret = panthor_alloc_heap_chunks(pool->ptdev, vm, heap, initial_chunk_count); if (ret) goto err_free_heap; first_chunk = list_first_entry(&heap->chunks, struct panthor_heap_chunk, node); *first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo); down_write(&pool->lock); /* The pool has been destroyed, we can't create a new heap. */ if (!pool->vm) { ret = -EINVAL; } else { ret = xa_alloc(&pool->xa, &id, heap, XA_LIMIT(0, MAX_HEAPS_PER_POOL - 1), GFP_KERNEL); if (!ret) { void *gpu_ctx = panthor_get_heap_ctx(pool, id); memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev)); *heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) + panthor_get_heap_ctx_offset(pool, id); } } up_write(&pool->lock); if (ret) goto err_free_heap; panthor_vm_put(vm); return id; err_free_heap: panthor_free_heap_chunks(pool->vm, heap); mutex_destroy(&heap->lock); kfree(heap); err_put_vm: panthor_vm_put(vm); return ret; } /** * panthor_heap_return_chunk() - Return an unused heap chunk * @pool: The pool this heap belongs to. * @heap_gpu_va: The GPU address of the heap context. * @chunk_gpu_va: The chunk VA to return. * * This function is used when a chunk allocated with panthor_heap_grow() * couldn't be linked to the heap context through the FW interface because * the group requesting the allocation was scheduled out in the meantime. */ int panthor_heap_return_chunk(struct panthor_heap_pool *pool, u64 heap_gpu_va, u64 chunk_gpu_va) { u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts); u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev); struct panthor_heap_chunk *chunk, *tmp, *removed = NULL; struct panthor_heap *heap; int ret; if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL) return -EINVAL; down_read(&pool->lock); heap = xa_load(&pool->xa, heap_id); if (!heap) { ret = -EINVAL; goto out_unlock; } chunk_gpu_va &= GENMASK_ULL(63, 12); mutex_lock(&heap->lock); list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) { if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) { removed = chunk; list_del(&chunk->node); heap->chunk_count--; break; } } mutex_unlock(&heap->lock); if (removed) { panthor_kernel_bo_destroy(chunk->bo); kfree(chunk); ret = 0; } else { ret = -EINVAL; } out_unlock: up_read(&pool->lock); return ret; } /** * panthor_heap_grow() - Make a heap context grow. * @pool: The pool this heap belongs to. * @heap_gpu_va: The GPU address of the heap context. * @renderpasses_in_flight: Number of render passes currently in-flight. * @pending_frag_count: Number of fragment jobs waiting for execution/completion. * @new_chunk_gpu_va: Pointer used to return the chunk VA. * * Return: * - 0 if a new heap was allocated * - -ENOMEM if the tiler context reached the maximum number of chunks * or if too many render passes are in-flight * or if the allocation failed * - -EINVAL if any of the arguments passed to panthor_heap_grow() is invalid */ int panthor_heap_grow(struct panthor_heap_pool *pool, u64 heap_gpu_va, u32 renderpasses_in_flight, u32 pending_frag_count, u64 *new_chunk_gpu_va) { u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts); u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev); struct panthor_heap_chunk *chunk; struct panthor_heap *heap; int ret; if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL) return -EINVAL; down_read(&pool->lock); heap = xa_load(&pool->xa, heap_id); if (!heap) { ret = -EINVAL; goto out_unlock; } /* If we reached the target in-flight render passes, or if we * reached the maximum number of chunks, let the FW figure another way to * find some memory (wait for render passes to finish, or call the exception * handler provided by the userspace driver, if any). */ if (renderpasses_in_flight > heap->target_in_flight || heap->chunk_count >= heap->max_chunks) { ret = -ENOMEM; goto out_unlock; } /* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation, * which goes through the blocking allocation path. Ultimately, we * want a non-blocking allocation, so we can immediately report to the * FW when the system is running out of memory. In that case, the FW * can call a user-provided exception handler, which might try to free * some tiler memory by issuing an intermediate fragment job. If the * exception handler can't do anything, it will flag the queue as * faulty so the job that triggered this tiler chunk allocation and all * further jobs in this queue fail immediately instead of having to * wait for the job timeout. */ ret = panthor_alloc_heap_chunk(pool->ptdev, pool->vm, heap, false); if (ret) goto out_unlock; chunk = list_first_entry(&heap->chunks, struct panthor_heap_chunk, node); *new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) | (heap->chunk_size >> 12); ret = 0; out_unlock: up_read(&pool->lock); return ret; } static void panthor_heap_pool_release(struct kref *refcount) { struct panthor_heap_pool *pool = container_of(refcount, struct panthor_heap_pool, refcount); xa_destroy(&pool->xa); kfree(pool); } /** * panthor_heap_pool_put() - Release a heap pool reference * @pool: Pool to release the reference on. Can be NULL. */ void panthor_heap_pool_put(struct panthor_heap_pool *pool) { if (pool) kref_put(&pool->refcount, panthor_heap_pool_release); } /** * panthor_heap_pool_get() - Get a heap pool reference * @pool: Pool to get the reference on. Can be NULL. * * Return: @pool. */ struct panthor_heap_pool * panthor_heap_pool_get(struct panthor_heap_pool *pool) { if (pool) kref_get(&pool->refcount); return pool; } /** * panthor_heap_pool_create() - Create a heap pool * @ptdev: Device. * @vm: The VM this heap pool will be attached to. * * Heap pools might contain up to 128 heap contexts, and are per-VM. * * Return: A valid pointer on success, a negative error code otherwise. */ struct panthor_heap_pool * panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm) { size_t bosize = ALIGN(MAX_HEAPS_PER_POOL * panthor_heap_ctx_stride(ptdev), 4096); struct panthor_heap_pool *pool; int ret = 0; pool = kzalloc(sizeof(*pool), GFP_KERNEL); if (!pool) return ERR_PTR(-ENOMEM); /* We want a weak ref here: the heap pool belongs to the VM, so we're * sure that, as long as the heap pool exists, the VM exists too. */ pool->vm = vm; pool->ptdev = ptdev; init_rwsem(&pool->lock); xa_init_flags(&pool->xa, XA_FLAGS_ALLOC); kref_init(&pool->refcount); pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize, DRM_PANTHOR_BO_NO_MMAP, DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, PANTHOR_VM_KERNEL_AUTO_VA); if (IS_ERR(pool->gpu_contexts)) { ret = PTR_ERR(pool->gpu_contexts); goto err_destroy_pool; } ret = panthor_kernel_bo_vmap(pool->gpu_contexts); if (ret) goto err_destroy_pool; return pool; err_destroy_pool: panthor_heap_pool_destroy(pool); return ERR_PTR(ret); } /** * panthor_heap_pool_destroy() - Destroy a heap pool. * @pool: Pool to destroy. * * This function destroys all heap contexts and their resources. Thus * preventing any use of the heap context or the chunk attached to them * after that point. * * If the GPU still has access to some heap contexts, a fault should be * triggered, which should flag the command stream groups using these * context as faulty. * * The heap pool object is only released when all references to this pool * are released. */ void panthor_heap_pool_destroy(struct panthor_heap_pool *pool) { struct panthor_heap *heap; unsigned long i; if (!pool) return; down_write(&pool->lock); xa_for_each(&pool->xa, i, heap) drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i)); if (!IS_ERR_OR_NULL(pool->gpu_contexts)) panthor_kernel_bo_destroy(pool->gpu_contexts); /* Reflects the fact the pool has been destroyed. */ pool->vm = NULL; up_write(&pool->lock); panthor_heap_pool_put(pool); }