// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include "arm-smmu.h" #include "arm-smmu-qcom.h" #define TBU_DBG_TIMEOUT_US 100 #define DEBUG_AXUSER_REG 0x30 #define DEBUG_AXUSER_CDMID GENMASK_ULL(43, 36) #define DEBUG_AXUSER_CDMID_VAL 0xff #define DEBUG_PAR_REG 0x28 #define DEBUG_PAR_FAULT_VAL BIT(0) #define DEBUG_PAR_PA GENMASK_ULL(47, 12) #define DEBUG_SID_HALT_REG 0x0 #define DEBUG_SID_HALT_VAL BIT(16) #define DEBUG_SID_HALT_SID GENMASK(9, 0) #define DEBUG_SR_HALT_ACK_REG 0x20 #define DEBUG_SR_HALT_ACK_VAL BIT(1) #define DEBUG_SR_ECATS_RUNNING_VAL BIT(0) #define DEBUG_TXN_AXCACHE GENMASK(5, 2) #define DEBUG_TXN_AXPROT GENMASK(8, 6) #define DEBUG_TXN_AXPROT_PRIV 0x1 #define DEBUG_TXN_AXPROT_NSEC 0x2 #define DEBUG_TXN_TRIGG_REG 0x18 #define DEBUG_TXN_TRIGGER BIT(0) #define DEBUG_VA_ADDR_REG 0x8 static LIST_HEAD(tbu_list); static DEFINE_MUTEX(tbu_list_lock); static DEFINE_SPINLOCK(atos_lock); struct qcom_tbu { struct device *dev; struct device_node *smmu_np; u32 sid_range[2]; struct list_head list; struct clk *clk; struct icc_path *path; void __iomem *base; spinlock_t halt_lock; /* multiple halt or resume can't execute concurrently */ int halt_count; }; static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu) { return container_of(smmu, struct qcom_smmu, smmu); } void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu) { int ret; u32 tbu_pwr_status, sync_inv_ack, sync_inv_progress; struct qcom_smmu *qsmmu = container_of(smmu, struct qcom_smmu, smmu); const struct qcom_smmu_config *cfg; static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); if (__ratelimit(&rs)) { dev_err(smmu->dev, "TLB sync timed out -- SMMU may be deadlocked\n"); cfg = qsmmu->cfg; if (!cfg) return; ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_TBU_PWR_STATUS], &tbu_pwr_status); if (ret) dev_err(smmu->dev, "Failed to read TBU power status: %d\n", ret); ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK], &sync_inv_ack); if (ret) dev_err(smmu->dev, "Failed to read TBU sync/inv ack status: %d\n", ret); ret = qcom_scm_io_readl(smmu->ioaddr + cfg->reg_offset[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR], &sync_inv_progress); if (ret) dev_err(smmu->dev, "Failed to read TCU syn/inv progress: %d\n", ret); dev_err(smmu->dev, "TBU: power_status %#x sync_inv_ack %#x sync_inv_progress %#x\n", tbu_pwr_status, sync_inv_ack, sync_inv_progress); } } static struct qcom_tbu *qcom_find_tbu(struct qcom_smmu *qsmmu, u32 sid) { struct qcom_tbu *tbu; u32 start, end; guard(mutex)(&tbu_list_lock); if (list_empty(&tbu_list)) return NULL; list_for_each_entry(tbu, &tbu_list, list) { start = tbu->sid_range[0]; end = start + tbu->sid_range[1]; if (qsmmu->smmu.dev->of_node == tbu->smmu_np && start <= sid && sid < end) return tbu; } dev_err(qsmmu->smmu.dev, "Unable to find TBU for sid 0x%x\n", sid); return NULL; } static int qcom_tbu_halt(struct qcom_tbu *tbu, struct arm_smmu_domain *smmu_domain) { struct arm_smmu_device *smmu = smmu_domain->smmu; int ret = 0, idx = smmu_domain->cfg.cbndx; u32 val, fsr, status; guard(spinlock_irqsave)(&tbu->halt_lock); if (tbu->halt_count) { tbu->halt_count++; return ret; } val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG); val |= DEBUG_SID_HALT_VAL; writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG); fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); if ((fsr & ARM_SMMU_CB_FSR_FAULT) && (fsr & ARM_SMMU_CB_FSR_SS)) { u32 sctlr_orig, sctlr; /* * We are in a fault. Our request to halt the bus will not * complete until transactions in front of us (such as the fault * itself) have completed. Disable iommu faults and terminate * any existing transactions. */ sctlr_orig = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_SCTLR); sctlr = sctlr_orig & ~(ARM_SMMU_SCTLR_CFCFG | ARM_SMMU_SCTLR_CFIE); arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr); arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, ARM_SMMU_RESUME_TERMINATE); arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr_orig); } if (readl_poll_timeout_atomic(tbu->base + DEBUG_SR_HALT_ACK_REG, status, (status & DEBUG_SR_HALT_ACK_VAL), 0, TBU_DBG_TIMEOUT_US)) { dev_err(tbu->dev, "Timeout while trying to halt TBU!\n"); ret = -ETIMEDOUT; val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG); val &= ~DEBUG_SID_HALT_VAL; writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG); return ret; } tbu->halt_count = 1; return ret; } static void qcom_tbu_resume(struct qcom_tbu *tbu) { u32 val; guard(spinlock_irqsave)(&tbu->halt_lock); if (!tbu->halt_count) { WARN(1, "%s: halt_count is 0", dev_name(tbu->dev)); return; } if (tbu->halt_count > 1) { tbu->halt_count--; return; } val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG); val &= ~DEBUG_SID_HALT_VAL; writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG); tbu->halt_count = 0; } static phys_addr_t qcom_tbu_trigger_atos(struct arm_smmu_domain *smmu_domain, struct qcom_tbu *tbu, dma_addr_t iova, u32 sid) { bool atos_timedout = false; phys_addr_t phys = 0; ktime_t timeout; u64 val; /* Set address and stream-id */ val = readq_relaxed(tbu->base + DEBUG_SID_HALT_REG); val &= ~DEBUG_SID_HALT_SID; val |= FIELD_PREP(DEBUG_SID_HALT_SID, sid); writeq_relaxed(val, tbu->base + DEBUG_SID_HALT_REG); writeq_relaxed(iova, tbu->base + DEBUG_VA_ADDR_REG); val = FIELD_PREP(DEBUG_AXUSER_CDMID, DEBUG_AXUSER_CDMID_VAL); writeq_relaxed(val, tbu->base + DEBUG_AXUSER_REG); /* Write-back read and write-allocate */ val = FIELD_PREP(DEBUG_TXN_AXCACHE, 0xf); /* Non-secure access */ val |= FIELD_PREP(DEBUG_TXN_AXPROT, DEBUG_TXN_AXPROT_NSEC); /* Privileged access */ val |= FIELD_PREP(DEBUG_TXN_AXPROT, DEBUG_TXN_AXPROT_PRIV); val |= DEBUG_TXN_TRIGGER; writeq_relaxed(val, tbu->base + DEBUG_TXN_TRIGG_REG); timeout = ktime_add_us(ktime_get(), TBU_DBG_TIMEOUT_US); for (;;) { val = readl_relaxed(tbu->base + DEBUG_SR_HALT_ACK_REG); if (!(val & DEBUG_SR_ECATS_RUNNING_VAL)) break; val = readl_relaxed(tbu->base + DEBUG_PAR_REG); if (val & DEBUG_PAR_FAULT_VAL) break; if (ktime_compare(ktime_get(), timeout) > 0) { atos_timedout = true; break; } } val = readq_relaxed(tbu->base + DEBUG_PAR_REG); if (val & DEBUG_PAR_FAULT_VAL) dev_err(tbu->dev, "ATOS generated a fault interrupt! PAR = %llx, SID=0x%x\n", val, sid); else if (atos_timedout) dev_err_ratelimited(tbu->dev, "ATOS translation timed out!\n"); else phys = FIELD_GET(DEBUG_PAR_PA, val); /* Reset hardware */ writeq_relaxed(0, tbu->base + DEBUG_TXN_TRIGG_REG); writeq_relaxed(0, tbu->base + DEBUG_VA_ADDR_REG); val = readl_relaxed(tbu->base + DEBUG_SID_HALT_REG); val &= ~DEBUG_SID_HALT_SID; writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG); return phys; } static phys_addr_t qcom_iova_to_phys(struct arm_smmu_domain *smmu_domain, dma_addr_t iova, u32 sid) { struct arm_smmu_device *smmu = smmu_domain->smmu; struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); int idx = smmu_domain->cfg.cbndx; struct qcom_tbu *tbu; u32 sctlr_orig, sctlr; phys_addr_t phys = 0; int attempt = 0; int ret; u64 fsr; tbu = qcom_find_tbu(qsmmu, sid); if (!tbu) return 0; ret = icc_set_bw(tbu->path, 0, UINT_MAX); if (ret) return ret; ret = clk_prepare_enable(tbu->clk); if (ret) goto disable_icc; ret = qcom_tbu_halt(tbu, smmu_domain); if (ret) goto disable_clk; /* * ATOS/ECATS can trigger the fault interrupt, so disable it temporarily * and check for an interrupt manually. */ sctlr_orig = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_SCTLR); sctlr = sctlr_orig & ~(ARM_SMMU_SCTLR_CFCFG | ARM_SMMU_SCTLR_CFIE); arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr); fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); if (fsr & ARM_SMMU_CB_FSR_FAULT) { /* Clear pending interrupts */ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); /* * TBU halt takes care of resuming any stalled transcation. * Kept it here for completeness sake. */ if (fsr & ARM_SMMU_CB_FSR_SS) arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, ARM_SMMU_RESUME_TERMINATE); } /* Only one concurrent atos operation */ scoped_guard(spinlock_irqsave, &atos_lock) { /* * If the translation fails, attempt the lookup more time." */ do { phys = qcom_tbu_trigger_atos(smmu_domain, tbu, iova, sid); fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); if (fsr & ARM_SMMU_CB_FSR_FAULT) { /* Clear pending interrupts */ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); if (fsr & ARM_SMMU_CB_FSR_SS) arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, ARM_SMMU_RESUME_TERMINATE); } } while (!phys && attempt++ < 2); arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr_orig); } qcom_tbu_resume(tbu); /* Read to complete prior write transcations */ readl_relaxed(tbu->base + DEBUG_SR_HALT_ACK_REG); disable_clk: clk_disable_unprepare(tbu->clk); disable_icc: icc_set_bw(tbu->path, 0, 0); return phys; } static phys_addr_t qcom_smmu_iova_to_phys_hard(struct arm_smmu_domain *smmu_domain, dma_addr_t iova) { struct arm_smmu_device *smmu = smmu_domain->smmu; int idx = smmu_domain->cfg.cbndx; u32 frsynra; u16 sid; frsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx)); sid = FIELD_GET(ARM_SMMU_CBFRSYNRA_SID, frsynra); return qcom_iova_to_phys(smmu_domain, iova, sid); } static phys_addr_t qcom_smmu_verify_fault(struct arm_smmu_domain *smmu_domain, dma_addr_t iova, u32 fsr) { struct io_pgtable *iop = io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops); struct arm_smmu_device *smmu = smmu_domain->smmu; phys_addr_t phys_post_tlbiall; phys_addr_t phys; phys = qcom_smmu_iova_to_phys_hard(smmu_domain, iova); io_pgtable_tlb_flush_all(iop); phys_post_tlbiall = qcom_smmu_iova_to_phys_hard(smmu_domain, iova); if (phys != phys_post_tlbiall) { dev_err(smmu->dev, "ATOS results differed across TLBIALL... (before: %pa after: %pa)\n", &phys, &phys_post_tlbiall); } return (phys == 0 ? phys_post_tlbiall : phys); } irqreturn_t qcom_smmu_context_fault(int irq, void *dev) { struct arm_smmu_domain *smmu_domain = dev; struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_context_fault_info cfi; u32 resume = 0; int idx = smmu_domain->cfg.cbndx; phys_addr_t phys_soft; int ret, tmp; static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); arm_smmu_read_context_fault_info(smmu, idx, &cfi); if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT)) return IRQ_NONE; if (list_empty(&tbu_list)) { ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova, cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); if (ret == -ENOSYS) arm_smmu_print_context_fault_info(smmu, idx, &cfi); arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr); return IRQ_HANDLED; } phys_soft = ops->iova_to_phys(ops, cfi.iova); tmp = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova, cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); if (!tmp || tmp == -EBUSY) { ret = IRQ_HANDLED; resume = ARM_SMMU_RESUME_TERMINATE; } else { phys_addr_t phys_atos = qcom_smmu_verify_fault(smmu_domain, cfi.iova, cfi.fsr); if (__ratelimit(&_rs)) { arm_smmu_print_context_fault_info(smmu, idx, &cfi); dev_err(smmu->dev, "soft iova-to-phys=%pa\n", &phys_soft); if (!phys_soft) dev_err(smmu->dev, "SOFTWARE TABLE WALK FAILED! Looks like %s accessed an unmapped address!\n", dev_name(smmu->dev)); if (phys_atos) dev_err(smmu->dev, "hard iova-to-phys (ATOS)=%pa\n", &phys_atos); else dev_err(smmu->dev, "hard iova-to-phys (ATOS) failed\n"); } ret = IRQ_NONE; resume = ARM_SMMU_RESUME_TERMINATE; } /* * If the client returns -EBUSY, do not clear FSR and do not RESUME * if stalled. This is required to keep the IOMMU client stalled on * the outstanding fault. This gives the client a chance to take any * debug action and then terminate the stalled transaction. * So, the sequence in case of stall on fault should be: * 1) Do not clear FSR or write to RESUME here * 2) Client takes any debug action * 3) Client terminates the stalled transaction and resumes the IOMMU * 4) Client clears FSR. The FSR should only be cleared after 3) and * not before so that the fault remains outstanding. This ensures * SCTLR.HUPCF has the desired effect if subsequent transactions also * need to be terminated. */ if (tmp != -EBUSY) { /* Clear the faulting FSR */ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr); /* Retry or terminate any stalled transactions */ if (cfi.fsr & ARM_SMMU_CB_FSR_SS) arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, resume); } return ret; } int qcom_tbu_probe(struct platform_device *pdev) { struct of_phandle_args args = { .args_count = 2 }; struct device_node *np = pdev->dev.of_node; struct device *dev = &pdev->dev; struct qcom_tbu *tbu; tbu = devm_kzalloc(dev, sizeof(*tbu), GFP_KERNEL); if (!tbu) return -ENOMEM; tbu->dev = dev; INIT_LIST_HEAD(&tbu->list); spin_lock_init(&tbu->halt_lock); if (of_parse_phandle_with_args(np, "qcom,stream-id-range", "#iommu-cells", 0, &args)) { dev_err(dev, "Cannot parse the 'qcom,stream-id-range' DT property\n"); return -EINVAL; } tbu->smmu_np = args.np; tbu->sid_range[0] = args.args[0]; tbu->sid_range[1] = args.args[1]; of_node_put(args.np); tbu->base = devm_of_iomap(dev, np, 0, NULL); if (IS_ERR(tbu->base)) return PTR_ERR(tbu->base); tbu->clk = devm_clk_get_optional(dev, NULL); if (IS_ERR(tbu->clk)) return PTR_ERR(tbu->clk); tbu->path = devm_of_icc_get(dev, NULL); if (IS_ERR(tbu->path)) return PTR_ERR(tbu->path); guard(mutex)(&tbu_list_lock); list_add_tail(&tbu->list, &tbu_list); return 0; }