| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * platform_device probing code for ARM performance counters. |
| * |
| * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles |
| * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com> |
| */ |
| #define pr_fmt(fmt) "hw perfevents: " fmt |
| |
| #include <linux/bug.h> |
| #include <linux/cpumask.h> |
| #include <linux/device.h> |
| #include <linux/errno.h> |
| #include <linux/irq.h> |
| #include <linux/irqdesc.h> |
| #include <linux/kconfig.h> |
| #include <linux/of.h> |
| #include <linux/of_device.h> |
| #include <linux/percpu.h> |
| #include <linux/perf/arm_pmu.h> |
| #include <linux/platform_device.h> |
| #include <linux/printk.h> |
| #include <linux/smp.h> |
| |
| static int probe_current_pmu(struct arm_pmu *pmu, |
| const struct pmu_probe_info *info) |
| { |
| int cpu = get_cpu(); |
| unsigned int cpuid = read_cpuid_id(); |
| int ret = -ENODEV; |
| |
| pr_info("probing PMU on CPU %d\n", cpu); |
| |
| for (; info->init != NULL; info++) { |
| if ((cpuid & info->mask) != info->cpuid) |
| continue; |
| ret = info->init(pmu); |
| break; |
| } |
| |
| put_cpu(); |
| return ret; |
| } |
| |
| static int pmu_parse_percpu_irq(struct arm_pmu *pmu, int irq) |
| { |
| int cpu, ret; |
| struct pmu_hw_events __percpu *hw_events = pmu->hw_events; |
| |
| ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus); |
| if (ret) |
| return ret; |
| |
| for_each_cpu(cpu, &pmu->supported_cpus) |
| per_cpu(hw_events->irq, cpu) = irq; |
| |
| return 0; |
| } |
| |
| static bool pmu_has_irq_affinity(struct device_node *node) |
| { |
| return !!of_find_property(node, "interrupt-affinity", NULL); |
| } |
| |
| static int pmu_parse_irq_affinity(struct device_node *node, int i) |
| { |
| struct device_node *dn; |
| int cpu; |
| |
| /* |
| * If we don't have an interrupt-affinity property, we guess irq |
| * affinity matches our logical CPU order, as we used to assume. |
| * This is fragile, so we'll warn in pmu_parse_irqs(). |
| */ |
| if (!pmu_has_irq_affinity(node)) |
| return i; |
| |
| dn = of_parse_phandle(node, "interrupt-affinity", i); |
| if (!dn) { |
| pr_warn("failed to parse interrupt-affinity[%d] for %pOFn\n", |
| i, node); |
| return -EINVAL; |
| } |
| |
| cpu = of_cpu_node_to_id(dn); |
| if (cpu < 0) { |
| pr_warn("failed to find logical CPU for %pOFn\n", dn); |
| cpu = nr_cpu_ids; |
| } |
| |
| of_node_put(dn); |
| |
| return cpu; |
| } |
| |
| static int pmu_parse_irqs(struct arm_pmu *pmu) |
| { |
| int i = 0, num_irqs; |
| struct platform_device *pdev = pmu->plat_device; |
| struct pmu_hw_events __percpu *hw_events = pmu->hw_events; |
| |
| num_irqs = platform_irq_count(pdev); |
| if (num_irqs < 0) { |
| pr_err("unable to count PMU IRQs\n"); |
| return num_irqs; |
| } |
| |
| /* |
| * In this case we have no idea which CPUs are covered by the PMU. |
| * To match our prior behaviour, we assume all CPUs in this case. |
| */ |
| if (num_irqs == 0) { |
| pr_warn("no irqs for PMU, sampling events not supported\n"); |
| pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; |
| cpumask_setall(&pmu->supported_cpus); |
| return 0; |
| } |
| |
| if (num_irqs == 1) { |
| int irq = platform_get_irq(pdev, 0); |
| if (irq && irq_is_percpu_devid(irq)) |
| return pmu_parse_percpu_irq(pmu, irq); |
| } |
| |
| if (nr_cpu_ids != 1 && !pmu_has_irq_affinity(pdev->dev.of_node)) { |
| pr_warn("no interrupt-affinity property for %pOF, guessing.\n", |
| pdev->dev.of_node); |
| } |
| |
| for (i = 0; i < num_irqs; i++) { |
| int cpu, irq; |
| |
| irq = platform_get_irq(pdev, i); |
| if (WARN_ON(irq <= 0)) |
| continue; |
| |
| if (irq_is_percpu_devid(irq)) { |
| pr_warn("multiple PPIs or mismatched SPI/PPI detected\n"); |
| return -EINVAL; |
| } |
| |
| cpu = pmu_parse_irq_affinity(pdev->dev.of_node, i); |
| if (cpu < 0) |
| return cpu; |
| if (cpu >= nr_cpu_ids) |
| continue; |
| |
| if (per_cpu(hw_events->irq, cpu)) { |
| pr_warn("multiple PMU IRQs for the same CPU detected\n"); |
| return -EINVAL; |
| } |
| |
| per_cpu(hw_events->irq, cpu) = irq; |
| cpumask_set_cpu(cpu, &pmu->supported_cpus); |
| } |
| |
| return 0; |
| } |
| |
| static int armpmu_request_irqs(struct arm_pmu *armpmu) |
| { |
| struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; |
| int cpu, err = 0; |
| |
| for_each_cpu(cpu, &armpmu->supported_cpus) { |
| int irq = per_cpu(hw_events->irq, cpu); |
| if (!irq) |
| continue; |
| |
| err = armpmu_request_irq(irq, cpu); |
| if (err) |
| break; |
| } |
| |
| return err; |
| } |
| |
| static void armpmu_free_irqs(struct arm_pmu *armpmu) |
| { |
| int cpu; |
| struct pmu_hw_events __percpu *hw_events = armpmu->hw_events; |
| |
| for_each_cpu(cpu, &armpmu->supported_cpus) { |
| int irq = per_cpu(hw_events->irq, cpu); |
| |
| armpmu_free_irq(irq, cpu); |
| } |
| } |
| |
| #ifdef CONFIG_AMLOGIC_MODIFY |
| #include <linux/of_address.h> |
| #include <linux/delay.h> |
| |
| struct amlpmu_context amlpmu_ctx; |
| |
| static void amlpmu_fix_setup_affinity(int irq) |
| { |
| int cluster_index = 0; |
| int cpu; |
| int affinity_cpu = -1; |
| struct amlpmu_cpuinfo *ci = NULL; |
| struct amlpmu_context *ctx = &amlpmu_ctx; |
| s64 latest_next_stamp = S64_MAX; |
| |
| if (irq == ctx->irqs[0]) { |
| cluster_index = 0; |
| } else if (ctx->clusterb_enabled && irq == ctx->irqs[1]) { |
| cluster_index = 1; |
| } else { |
| pr_err("%s() bad irq = %d\n", __func__, irq); |
| return; |
| } |
| |
| /* |
| * find latest next_predicted_stamp cpu for affinity cpu |
| * if no cpu have predict time, select first cpu of cpumask |
| * todo: |
| * - if a cpu predict failed for continuous N times, |
| * try add some punishment. |
| * - if no cpu have predicted time, try recently most used cpu |
| * for affinity |
| * - try to keep and promote prediction accuracy |
| */ |
| for_each_cpu_and(cpu, |
| &ctx->cpumasks[cluster_index], |
| cpu_possible_mask) { |
| ci = per_cpu_ptr(ctx->cpuinfo, cpu); |
| //pr_info("cpu = %d, ci->next_predicted_stamp = %lld\n", |
| // cpu, ci->next_predicted_stamp); |
| if (ci->next_predicted_stamp && |
| ci->next_predicted_stamp < latest_next_stamp) { |
| latest_next_stamp = ci->next_predicted_stamp; |
| affinity_cpu = cpu; |
| } |
| } |
| |
| if (affinity_cpu == -1) { |
| affinity_cpu = cpumask_first(&ctx->cpumasks[cluster_index]); |
| pr_debug("used first cpu: %d, cluster: 0x%lx\n", |
| affinity_cpu, |
| *cpumask_bits(&ctx->cpumasks[cluster_index])); |
| } else { |
| pr_debug("find affinity cpu: %d, next_predicted_stamp: %lld\n", |
| affinity_cpu, |
| latest_next_stamp); |
| } |
| |
| if (irq_set_affinity(irq, cpumask_of(affinity_cpu))) |
| pr_err("irq_set_affinity() failed irq: %d, affinity_cpu: %d\n", |
| irq, |
| affinity_cpu); |
| } |
| |
| /* |
| * on pmu interrupt generated cpu, @irq_num is valid |
| * on other cpus(called by AML_PMU_IPI), @irq_num is -1 |
| */ |
| static void amlpmu_irq_fix(int irq_num) |
| { |
| int cpu; |
| int cur_cpu; |
| int pmuirq_val; |
| int cluster_index = 0; |
| struct amlpmu_context *ctx = &amlpmu_ctx; |
| |
| cur_cpu = smp_processor_id(); |
| |
| if (irq_num == ctx->irqs[0]) { |
| cluster_index = 0; |
| } else if (ctx->clusterb_enabled && irq_num == ctx->irqs[1]) { |
| cluster_index = 1; |
| } else { |
| pr_err("%s() bad irq = %d\n", __func__, irq_num); |
| return; |
| } |
| |
| if (!cpumask_test_cpu(cur_cpu, &ctx->cpumasks[cluster_index])) { |
| pr_warn("%s() cur_cpu %d not in cluster: 0x%lx\n", |
| __func__, |
| cur_cpu, |
| *cpumask_bits(&ctx->cpumasks[cluster_index])); |
| } |
| |
| pmuirq_val = readl(ctx->regs[cluster_index]); |
| pmuirq_val &= 0xf; |
| pmuirq_val <<= ctx->first_cpus[cluster_index]; |
| |
| pr_debug("%s() val=0x%0x, first_cpu=%d, cluster=0x%lx\n", |
| __func__, |
| readl(ctx->regs[cluster_index]), |
| ctx->first_cpus[cluster_index], |
| *cpumask_bits(&ctx->cpumasks[cluster_index])); |
| |
| /* |
| * if pmuirq_val is zero means we can't get irq cpu info |
| * from the register(eg: gxm clusterb), so we have to select another |
| * cpu(next cpu) in cluster to try to handle this irq. |
| */ |
| if (!pmuirq_val) { |
| int next_cpu = -1; |
| |
| for_each_cpu_and(cpu, |
| &ctx->cpumasks[cluster_index], |
| cpu_online_mask) { |
| if (cpu > cur_cpu) { |
| next_cpu = cpu; |
| break; |
| } |
| } |
| |
| if (next_cpu == -1) { |
| for_each_cpu_and(cpu, |
| &ctx->cpumasks[cluster_index], |
| cpu_online_mask) { |
| if (cpu < cur_cpu) { |
| next_cpu = cpu; |
| break; |
| } |
| } |
| } |
| |
| if (next_cpu != -1) { |
| if (irq_set_affinity(irq_num, cpumask_of(cpu))) |
| pr_err("irq_set_affin failed, irq=%d cpu=%d\n", |
| irq_num, |
| cpu); |
| } else { |
| pr_err("can't find nextcpu\n"); |
| } |
| |
| return; |
| } |
| |
| /* fix irq from register info */ |
| for_each_cpu_and(cpu, |
| &ctx->cpumasks[cluster_index], |
| cpu_online_mask) { |
| if (!(pmuirq_val & 1 << cpu)) |
| continue; |
| |
| if (cpu == cur_cpu) |
| continue; |
| |
| pr_debug("fix pmu irq cpu=%d, pmuirq=0x%x\n", cpu, pmuirq_val); |
| |
| if (irq_set_affinity(irq_num, cpumask_of(cpu))) |
| pr_err("irq_set_affinity() failed, irq=%d cpu=%d\n", |
| irq_num, |
| cpu); |
| |
| return; |
| } |
| |
| return; |
| } |
| |
| static void amlpmu_update_stats(int irq_num, int has_overflowed) |
| { |
| int freq; |
| int i; |
| ktime_t stamp; |
| unsigned long time = jiffies; |
| struct amlpmu_cpuinfo *ci; |
| struct amlpmu_context *ctx = &amlpmu_ctx; |
| |
| ci = this_cpu_ptr(ctx->cpuinfo); |
| |
| if (has_overflowed) { |
| ci->valid_irq_cnt++; |
| ci->valid_irq_time = time; |
| |
| stamp = ktime_get(); |
| ci->stamp_deltas[ci->valid_irq_cnt % MAX_DELTA_CNT] = |
| stamp - ci->last_stamp; |
| ci->last_stamp = stamp; |
| |
| /* update avg_delta if it's valid */ |
| ci->avg_delta = 0; |
| for (i = 0; i < MAX_DELTA_CNT; i++) |
| ci->avg_delta += ci->stamp_deltas[i]; |
| |
| ci->avg_delta /= MAX_DELTA_CNT; |
| for (i = 0; i < MAX_DELTA_CNT; i++) { |
| if ((ci->stamp_deltas[i] > ci->avg_delta * 3 / 2) || |
| (ci->stamp_deltas[i] < ci->avg_delta / 2)) { |
| ci->avg_delta = 0; |
| break; |
| } |
| } |
| |
| if (ci->avg_delta) |
| ci->next_predicted_stamp = |
| ci->last_stamp + ci->avg_delta; |
| else |
| ci->next_predicted_stamp = 0; |
| |
| pr_debug("irq_num = %d, valid_irq_cnt = %lu\n", |
| irq_num, |
| ci->valid_irq_cnt); |
| pr_debug("cur_delta = %lld, avg_delta = %lld, next = %lld\n", |
| ci->stamp_deltas[ci->valid_irq_cnt % MAX_DELTA_CNT], |
| ci->avg_delta, |
| ci->next_predicted_stamp); |
| } |
| |
| if (time_after(ci->valid_irq_time, ci->last_valid_irq_time + 2 * HZ)) { |
| freq = ci->valid_irq_cnt - ci->last_valid_irq_cnt; |
| freq *= HZ; |
| freq /= (ci->valid_irq_time - ci->last_valid_irq_time); |
| pr_info("######## valid_irq_cnt: %lu - %lu = %lu, freq = %d\n", |
| ci->valid_irq_cnt, |
| ci->last_valid_irq_cnt, |
| ci->valid_irq_cnt - ci->last_valid_irq_cnt, |
| freq); |
| |
| ci->last_valid_irq_cnt = ci->valid_irq_cnt; |
| ci->last_valid_irq_time = ci->valid_irq_time; |
| } |
| } |
| |
| void amlpmu_handle_irq(struct arm_pmu *cpu_pmu, int irq_num, int has_overflowed) |
| { |
| int cpu; |
| struct amlpmu_cpuinfo *ci; |
| struct amlpmu_context *ctx = &amlpmu_ctx; |
| |
| ci = this_cpu_ptr(ctx->cpuinfo); |
| ci->irq_num = irq_num; |
| cpu = smp_processor_id(); |
| |
| pr_debug("%s() irq_num = %d, overflowed = %d\n", |
| __func__, |
| irq_num, |
| has_overflowed); |
| |
| /* |
| * if current cpu is not overflowed, it's possible some other |
| * cpus caused the pmu interrupt. |
| * so if current cpu is interrupt generated cpu(irq_num != -1), |
| * call aml_pmu_fix() try to fix it. |
| */ |
| if (!has_overflowed) |
| amlpmu_irq_fix(irq_num); |
| |
| /* |
| * valid_irq status |
| * avg_delta time account to predict next interrupt time |
| */ |
| amlpmu_update_stats(irq_num, has_overflowed); |
| |
| if (has_overflowed) |
| amlpmu_fix_setup_affinity(irq_num); |
| } |
| |
| static int amlpmu_init(struct platform_device *pdev, struct arm_pmu *pmu) |
| { |
| int cpu; |
| int ret = 0; |
| int irq; |
| u32 cpumasks[MAX_CLUSTER_NR] = {0}; |
| struct amlpmu_context *ctx = &amlpmu_ctx; |
| struct amlpmu_cpuinfo *ci; |
| int cluster_nr = 1; |
| |
| memset(ctx, 0, sizeof(*ctx)); |
| |
| /* each cpu has it's own pmu interrtup */ |
| if (of_property_read_bool(pdev->dev.of_node, "private-interrupts")) { |
| ctx->private_interrupts = 1; |
| return 0; |
| } |
| |
| ctx->cpuinfo = __alloc_percpu_gfp(sizeof(struct amlpmu_cpuinfo), |
| SMP_CACHE_BYTES, |
| GFP_KERNEL | __GFP_ZERO); |
| if (!ctx->cpuinfo) { |
| pr_err("alloc percpu failed\n"); |
| ret = -ENOMEM; |
| goto free; |
| } |
| |
| for_each_possible_cpu(cpu) { |
| ci = per_cpu_ptr(ctx->cpuinfo, cpu); |
| ci->last_valid_irq_time = INITIAL_JIFFIES; |
| ci->last_fix_irq_time = INITIAL_JIFFIES; |
| ci->last_empty_irq_time = INITIAL_JIFFIES; |
| } |
| |
| ctx->pmu = pmu; |
| |
| if (of_property_read_bool(pdev->dev.of_node, "clusterb-enabled")) { |
| ctx->clusterb_enabled = 1; |
| cluster_nr = MAX_CLUSTER_NR; |
| } |
| |
| pr_info("clusterb_enabled = %d\n", ctx->clusterb_enabled); |
| |
| ret = of_property_read_u32_array(pdev->dev.of_node, |
| "cpumasks", |
| cpumasks, |
| cluster_nr); |
| if (ret) { |
| pr_err("read prop cpumasks failed, ret = %d\n", ret); |
| ret = -EINVAL; |
| goto free; |
| } |
| pr_info("cpumasks 0x%0x, 0x%0x\n", cpumasks[0], cpumasks[1]); |
| |
| irq = platform_get_irq(pdev, 0); |
| if (irq < 0) { |
| pr_err("get clusterA irq failed, %d\n", irq); |
| ret = -EINVAL; |
| goto free; |
| } |
| ctx->irqs[0] = irq; |
| pr_info("cluster A irq = %d\n", irq); |
| |
| ctx->regs[0] = of_iomap(pdev->dev.of_node, 0); |
| if (IS_ERR(ctx->regs[0])) { |
| pr_err("of_iomap() clusterA failed, base = %p\n", ctx->regs[0]); |
| ret = PTR_ERR(ctx->regs[0]); |
| goto free; |
| } |
| |
| cpumask_clear(&ctx->cpumasks[0]); |
| memcpy(cpumask_bits(&ctx->cpumasks[0]), |
| &cpumasks[0], |
| sizeof(cpumasks[0])); |
| if (!cpumask_intersects(&ctx->cpumasks[0], cpu_possible_mask)) { |
| pr_err("bad cpumasks[0] 0x%x\n", cpumasks[0]); |
| ret = -EINVAL; |
| goto free; |
| } |
| ctx->first_cpus[0] = cpumask_first(&ctx->cpumasks[0]); |
| |
| for_each_cpu(cpu, &ctx->cpumasks[0]) { |
| cpumask_set_cpu(cpu, &pmu->supported_cpus); |
| } |
| |
| amlpmu_fix_setup_affinity(ctx->irqs[0]); |
| |
| if (!ctx->clusterb_enabled) |
| return 0; |
| |
| irq = platform_get_irq(pdev, 1); |
| if (irq < 0) { |
| pr_err("get clusterB irq failed, %d\n", irq); |
| ret = -EINVAL; |
| goto free; |
| } |
| ctx->irqs[1] = irq; |
| pr_info("cluster B irq = %d\n", irq); |
| |
| ctx->regs[1] = of_iomap(pdev->dev.of_node, 1); |
| if (IS_ERR(ctx->regs[1])) { |
| pr_err("of_iomap() clusterA failed, base = %p\n", ctx->regs[1]); |
| ret = PTR_ERR(ctx->regs[1]); |
| goto free; |
| } |
| |
| cpumask_clear(&ctx->cpumasks[1]); |
| memcpy(cpumask_bits(&ctx->cpumasks[1]), |
| &cpumasks[1], |
| sizeof(cpumasks[1])); |
| if (!cpumask_intersects(&ctx->cpumasks[1], cpu_possible_mask)) { |
| pr_err("bad cpumasks[1] 0x%x\n", cpumasks[1]); |
| ret = -EINVAL; |
| goto free; |
| } else if (cpumask_intersects(&ctx->cpumasks[0], &ctx->cpumasks[1])) { |
| pr_err("cpumasks intersect 0x%x : 0x%x\n", |
| cpumasks[0], |
| cpumasks[1]); |
| ret = -EINVAL; |
| goto free; |
| } |
| ctx->first_cpus[1] = cpumask_first(&ctx->cpumasks[1]); |
| |
| for_each_cpu(cpu, &ctx->cpumasks[1]) { |
| cpumask_set_cpu(cpu, &pmu->supported_cpus); |
| } |
| |
| amlpmu_fix_setup_affinity(ctx->irqs[1]); |
| |
| return 0; |
| |
| free: |
| if (ctx->cpuinfo) |
| free_percpu(ctx->cpuinfo); |
| |
| if (ctx->regs[0]) |
| iounmap(ctx->regs[0]); |
| |
| if (ctx->regs[1]) |
| iounmap(ctx->regs[1]); |
| |
| return ret; |
| } |
| |
| #endif |
| |
| int arm_pmu_device_probe(struct platform_device *pdev, |
| const struct of_device_id *of_table, |
| const struct pmu_probe_info *probe_table) |
| { |
| const struct of_device_id *of_id; |
| armpmu_init_fn init_fn; |
| struct device_node *node = pdev->dev.of_node; |
| struct arm_pmu *pmu; |
| int ret = -ENODEV; |
| |
| pmu = armpmu_alloc(); |
| if (!pmu) |
| return -ENOMEM; |
| |
| pmu->plat_device = pdev; |
| |
| ret = pmu_parse_irqs(pmu); |
| if (ret) |
| goto out_free; |
| |
| if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) { |
| init_fn = of_id->data; |
| |
| pmu->secure_access = of_property_read_bool(pdev->dev.of_node, |
| "secure-reg-access"); |
| |
| /* arm64 systems boot only as non-secure */ |
| if (IS_ENABLED(CONFIG_ARM64) && pmu->secure_access) { |
| pr_warn("ignoring \"secure-reg-access\" property for arm64\n"); |
| pmu->secure_access = false; |
| } |
| |
| ret = init_fn(pmu); |
| } else if (probe_table) { |
| cpumask_setall(&pmu->supported_cpus); |
| ret = probe_current_pmu(pmu, probe_table); |
| } |
| |
| if (ret) { |
| pr_info("%pOF: failed to probe PMU!\n", node); |
| goto out_free; |
| } |
| |
| ret = armpmu_request_irqs(pmu); |
| if (ret) |
| goto out_free_irqs; |
| |
| ret = armpmu_register(pmu); |
| if (ret) |
| goto out_free_irqs; |
| |
| #ifdef CONFIG_AMLOGIC_MODIFY |
| if (amlpmu_init(pdev, pmu)) { |
| pr_err("amlpmu_init() failed\n"); |
| return 1; |
| } |
| #endif |
| |
| return 0; |
| |
| out_free_irqs: |
| armpmu_free_irqs(pmu); |
| out_free: |
| pr_info("%pOF: failed to register PMU devices!\n", node); |
| armpmu_free(pmu); |
| return ret; |
| } |