// SPDX-License-Identifier: GPL-2.0 /* * Handle device page faults * * Copyright (C) 2020 ARM Ltd. */ #include #include #include #include #include #include "iommu-priv.h" /* * Return the fault parameter of a device if it exists. Otherwise, return NULL. * On a successful return, the caller takes a reference of this parameter and * should put it after use by calling iopf_put_dev_fault_param(). */ static struct iommu_fault_param *iopf_get_dev_fault_param(struct device *dev) { struct dev_iommu *param = dev->iommu; struct iommu_fault_param *fault_param; rcu_read_lock(); fault_param = rcu_dereference(param->fault_param); if (fault_param && !refcount_inc_not_zero(&fault_param->users)) fault_param = NULL; rcu_read_unlock(); return fault_param; } /* Caller must hold a reference of the fault parameter. */ static void iopf_put_dev_fault_param(struct iommu_fault_param *fault_param) { if (refcount_dec_and_test(&fault_param->users)) kfree_rcu(fault_param, rcu); } static void __iopf_free_group(struct iopf_group *group) { struct iopf_fault *iopf, *next; list_for_each_entry_safe(iopf, next, &group->faults, list) { if (!(iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) kfree(iopf); } /* Pair with iommu_report_device_fault(). */ iopf_put_dev_fault_param(group->fault_param); } void iopf_free_group(struct iopf_group *group) { __iopf_free_group(group); kfree(group); } EXPORT_SYMBOL_GPL(iopf_free_group); /* Non-last request of a group. Postpone until the last one. */ static int report_partial_fault(struct iommu_fault_param *fault_param, struct iommu_fault *fault) { struct iopf_fault *iopf; iopf = kzalloc(sizeof(*iopf), GFP_KERNEL); if (!iopf) return -ENOMEM; iopf->fault = *fault; mutex_lock(&fault_param->lock); list_add(&iopf->list, &fault_param->partial); mutex_unlock(&fault_param->lock); return 0; } static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param, struct iopf_fault *evt, struct iopf_group *abort_group) { struct iopf_fault *iopf, *next; struct iopf_group *group; group = kzalloc(sizeof(*group), GFP_KERNEL); if (!group) { /* * We always need to construct the group as we need it to abort * the request at the driver if it can't be handled. */ group = abort_group; } group->fault_param = iopf_param; group->last_fault.fault = evt->fault; INIT_LIST_HEAD(&group->faults); INIT_LIST_HEAD(&group->pending_node); list_add(&group->last_fault.list, &group->faults); /* See if we have partial faults for this group */ mutex_lock(&iopf_param->lock); list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { if (iopf->fault.prm.grpid == evt->fault.prm.grpid) /* Insert *before* the last fault */ list_move(&iopf->list, &group->faults); } list_add(&group->pending_node, &iopf_param->faults); mutex_unlock(&iopf_param->lock); group->fault_count = list_count_nodes(&group->faults); return group; } static struct iommu_attach_handle *find_fault_handler(struct device *dev, struct iopf_fault *evt) { struct iommu_fault *fault = &evt->fault; struct iommu_attach_handle *attach_handle; if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) { attach_handle = iommu_attach_handle_get(dev->iommu_group, fault->prm.pasid, 0); if (IS_ERR(attach_handle)) { const struct iommu_ops *ops = dev_iommu_ops(dev); if (!ops->user_pasid_table) return NULL; /* * The iommu driver for this device supports user- * managed PASID table. Therefore page faults for * any PASID should go through the NESTING domain * attached to the device RID. */ attach_handle = iommu_attach_handle_get( dev->iommu_group, IOMMU_NO_PASID, IOMMU_DOMAIN_NESTED); if (IS_ERR(attach_handle)) return NULL; } } else { attach_handle = iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0); if (IS_ERR(attach_handle)) return NULL; } if (!attach_handle->domain->iopf_handler) return NULL; return attach_handle; } static void iopf_error_response(struct device *dev, struct iopf_fault *evt) { const struct iommu_ops *ops = dev_iommu_ops(dev); struct iommu_fault *fault = &evt->fault; struct iommu_page_response resp = { .pasid = fault->prm.pasid, .grpid = fault->prm.grpid, .code = IOMMU_PAGE_RESP_INVALID }; ops->page_response(dev, evt, &resp); } /** * iommu_report_device_fault() - Report fault event to device driver * @dev: the device * @evt: fault event data * * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ * handler. If this function fails then ops->page_response() was called to * complete evt if required. * * This module doesn't handle PCI PASID Stop Marker; IOMMU drivers must discard * them before reporting faults. A PASID Stop Marker (LRW = 0b100) doesn't * expect a response. It may be generated when disabling a PASID (issuing a * PASID stop request) by some PCI devices. * * The PASID stop request is issued by the device driver before unbind(). Once * it completes, no page request is generated for this PASID anymore and * outstanding ones have been pushed to the IOMMU (as per PCIe 4.0r1.0 - 6.20.1 * and 10.4.1.2 - Managing PASID TLP Prefix Usage). Some PCI devices will wait * for all outstanding page requests to come back with a response before * completing the PASID stop request. Others do not wait for page responses, and * instead issue this Stop Marker that tells us when the PASID can be * reallocated. * * It is safe to discard the Stop Marker because it is an optimization. * a. Page requests, which are posted requests, have been flushed to the IOMMU * when the stop request completes. * b. The IOMMU driver flushes all fault queues on unbind() before freeing the * PASID. * * So even though the Stop Marker might be issued by the device *after* the stop * request completes, outstanding faults will have been dealt with by the time * the PASID is freed. * * Any valid page fault will be eventually routed to an iommu domain and the * page fault handler installed there will get called. The users of this * handling framework should guarantee that the iommu domain could only be * freed after the device has stopped generating page faults (or the iommu * hardware has been set to block the page faults) and the pending page faults * have been flushed. In case no page fault handler is attached or no iopf params * are setup, then the ops->page_response() is called to complete the evt. * * Returns 0 on success, or an error in case of a bad/failed iopf setup. */ int iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) { struct iommu_attach_handle *attach_handle; struct iommu_fault *fault = &evt->fault; struct iommu_fault_param *iopf_param; struct iopf_group abort_group = {}; struct iopf_group *group; attach_handle = find_fault_handler(dev, evt); if (!attach_handle) goto err_bad_iopf; /* * Something has gone wrong if a fault capable domain is attached but no * iopf_param is setup */ iopf_param = iopf_get_dev_fault_param(dev); if (WARN_ON(!iopf_param)) goto err_bad_iopf; if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { int ret; ret = report_partial_fault(iopf_param, fault); iopf_put_dev_fault_param(iopf_param); /* A request that is not the last does not need to be ack'd */ return ret; } /* * This is the last page fault of a group. Allocate an iopf group and * pass it to domain's page fault handler. The group holds a reference * count of the fault parameter. It will be released after response or * error path of this function. If an error is returned, the caller * will send a response to the hardware. We need to clean up before * leaving, otherwise partial faults will be stuck. */ group = iopf_group_alloc(iopf_param, evt, &abort_group); if (group == &abort_group) goto err_abort; group->attach_handle = attach_handle; /* * On success iopf_handler must call iopf_group_response() and * iopf_free_group() */ if (group->attach_handle->domain->iopf_handler(group)) goto err_abort; return 0; err_abort: dev_warn_ratelimited(dev, "iopf with pasid %d aborted\n", fault->prm.pasid); iopf_group_response(group, IOMMU_PAGE_RESP_FAILURE); if (group == &abort_group) __iopf_free_group(group); else iopf_free_group(group); return 0; err_bad_iopf: if (fault->type == IOMMU_FAULT_PAGE_REQ) iopf_error_response(dev, evt); return -EINVAL; } EXPORT_SYMBOL_GPL(iommu_report_device_fault); /** * iopf_queue_flush_dev - Ensure that all queued faults have been processed * @dev: the endpoint whose faults need to be flushed. * * The IOMMU driver calls this before releasing a PASID, to ensure that all * pending faults for this PASID have been handled, and won't hit the address * space of the next process that uses this PASID. The driver must make sure * that no new fault is added to the queue. In particular it must flush its * low-level queue before calling this function. * * Return: 0 on success and <0 on error. */ int iopf_queue_flush_dev(struct device *dev) { struct iommu_fault_param *iopf_param; /* * It's a driver bug to be here after iopf_queue_remove_device(). * Therefore, it's safe to dereference the fault parameter without * holding the lock. */ iopf_param = rcu_dereference_check(dev->iommu->fault_param, true); if (WARN_ON(!iopf_param)) return -ENODEV; flush_workqueue(iopf_param->queue->wq); return 0; } EXPORT_SYMBOL_GPL(iopf_queue_flush_dev); /** * iopf_group_response - Respond a group of page faults * @group: the group of faults with the same group id * @status: the response code */ void iopf_group_response(struct iopf_group *group, enum iommu_page_response_code status) { struct iommu_fault_param *fault_param = group->fault_param; struct iopf_fault *iopf = &group->last_fault; struct device *dev = group->fault_param->dev; const struct iommu_ops *ops = dev_iommu_ops(dev); struct iommu_page_response resp = { .pasid = iopf->fault.prm.pasid, .grpid = iopf->fault.prm.grpid, .code = status, }; /* Only send response if there is a fault report pending */ mutex_lock(&fault_param->lock); if (!list_empty(&group->pending_node)) { ops->page_response(dev, &group->last_fault, &resp); list_del_init(&group->pending_node); } mutex_unlock(&fault_param->lock); } EXPORT_SYMBOL_GPL(iopf_group_response); /** * iopf_queue_discard_partial - Remove all pending partial fault * @queue: the queue whose partial faults need to be discarded * * When the hardware queue overflows, last page faults in a group may have been * lost and the IOMMU driver calls this to discard all partial faults. The * driver shouldn't be adding new faults to this queue concurrently. * * Return: 0 on success and <0 on error. */ int iopf_queue_discard_partial(struct iopf_queue *queue) { struct iopf_fault *iopf, *next; struct iommu_fault_param *iopf_param; if (!queue) return -EINVAL; mutex_lock(&queue->lock); list_for_each_entry(iopf_param, &queue->devices, queue_list) { mutex_lock(&iopf_param->lock); list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { list_del(&iopf->list); kfree(iopf); } mutex_unlock(&iopf_param->lock); } mutex_unlock(&queue->lock); return 0; } EXPORT_SYMBOL_GPL(iopf_queue_discard_partial); /** * iopf_queue_add_device - Add producer to the fault queue * @queue: IOPF queue * @dev: device to add * * Return: 0 on success and <0 on error. */ int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev) { int ret = 0; struct dev_iommu *param = dev->iommu; struct iommu_fault_param *fault_param; const struct iommu_ops *ops = dev_iommu_ops(dev); if (!ops->page_response) return -ENODEV; mutex_lock(&queue->lock); mutex_lock(¶m->lock); if (rcu_dereference_check(param->fault_param, lockdep_is_held(¶m->lock))) { ret = -EBUSY; goto done_unlock; } fault_param = kzalloc(sizeof(*fault_param), GFP_KERNEL); if (!fault_param) { ret = -ENOMEM; goto done_unlock; } mutex_init(&fault_param->lock); INIT_LIST_HEAD(&fault_param->faults); INIT_LIST_HEAD(&fault_param->partial); fault_param->dev = dev; refcount_set(&fault_param->users, 1); list_add(&fault_param->queue_list, &queue->devices); fault_param->queue = queue; rcu_assign_pointer(param->fault_param, fault_param); done_unlock: mutex_unlock(¶m->lock); mutex_unlock(&queue->lock); return ret; } EXPORT_SYMBOL_GPL(iopf_queue_add_device); /** * iopf_queue_remove_device - Remove producer from fault queue * @queue: IOPF queue * @dev: device to remove * * Removing a device from an iopf_queue. It's recommended to follow these * steps when removing a device: * * - Disable new PRI reception: Turn off PRI generation in the IOMMU hardware * and flush any hardware page request queues. This should be done before * calling into this helper. * - Acknowledge all outstanding PRQs to the device: Respond to all outstanding * page requests with IOMMU_PAGE_RESP_INVALID, indicating the device should * not retry. This helper function handles this. * - Disable PRI on the device: After calling this helper, the caller could * then disable PRI on the device. * * Calling iopf_queue_remove_device() essentially disassociates the device. * The fault_param might still exist, but iommu_page_response() will do * nothing. The device fault parameter reference count has been properly * passed from iommu_report_device_fault() to the fault handling work, and * will eventually be released after iommu_page_response(). */ void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) { struct iopf_fault *partial_iopf; struct iopf_fault *next; struct iopf_group *group, *temp; struct dev_iommu *param = dev->iommu; struct iommu_fault_param *fault_param; const struct iommu_ops *ops = dev_iommu_ops(dev); mutex_lock(&queue->lock); mutex_lock(¶m->lock); fault_param = rcu_dereference_check(param->fault_param, lockdep_is_held(¶m->lock)); if (WARN_ON(!fault_param || fault_param->queue != queue)) goto unlock; mutex_lock(&fault_param->lock); list_for_each_entry_safe(partial_iopf, next, &fault_param->partial, list) kfree(partial_iopf); list_for_each_entry_safe(group, temp, &fault_param->faults, pending_node) { struct iopf_fault *iopf = &group->last_fault; struct iommu_page_response resp = { .pasid = iopf->fault.prm.pasid, .grpid = iopf->fault.prm.grpid, .code = IOMMU_PAGE_RESP_INVALID }; ops->page_response(dev, iopf, &resp); list_del_init(&group->pending_node); } mutex_unlock(&fault_param->lock); list_del(&fault_param->queue_list); /* dec the ref owned by iopf_queue_add_device() */ rcu_assign_pointer(param->fault_param, NULL); iopf_put_dev_fault_param(fault_param); unlock: mutex_unlock(¶m->lock); mutex_unlock(&queue->lock); } EXPORT_SYMBOL_GPL(iopf_queue_remove_device); /** * iopf_queue_alloc - Allocate and initialize a fault queue * @name: a unique string identifying the queue (for workqueue) * * Return: the queue on success and NULL on error. */ struct iopf_queue *iopf_queue_alloc(const char *name) { struct iopf_queue *queue; queue = kzalloc(sizeof(*queue), GFP_KERNEL); if (!queue) return NULL; /* * The WQ is unordered because the low-level handler enqueues faults by * group. PRI requests within a group have to be ordered, but once * that's dealt with, the high-level function can handle groups out of * order. */ queue->wq = alloc_workqueue("iopf_queue/%s", WQ_UNBOUND, 0, name); if (!queue->wq) { kfree(queue); return NULL; } INIT_LIST_HEAD(&queue->devices); mutex_init(&queue->lock); return queue; } EXPORT_SYMBOL_GPL(iopf_queue_alloc); /** * iopf_queue_free - Free IOPF queue * @queue: queue to free * * Counterpart to iopf_queue_alloc(). The driver must not be queuing faults or * adding/removing devices on this queue anymore. */ void iopf_queue_free(struct iopf_queue *queue) { struct iommu_fault_param *iopf_param, *next; if (!queue) return; list_for_each_entry_safe(iopf_param, next, &queue->devices, queue_list) iopf_queue_remove_device(queue, iopf_param->dev); destroy_workqueue(queue->wq); kfree(queue); } EXPORT_SYMBOL_GPL(iopf_queue_free);