Skip to content

Instantly share code, notes, and snippets.

@syldrathecat
Created August 7, 2020 11:24
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save syldrathecat/6919462dde66c0e89849d987fa40429a to your computer and use it in GitHub Desktop.
Save syldrathecat/6919462dde66c0e89849d987fa40429a to your computer and use it in GitHub Desktop.
zen linux shcheduler hacks
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 883da0abf779..c845d3f7dc95 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1012,8 +1012,7 @@ config SCHED_MC
config SCHED_MC_PRIO
bool "CPU core priorities scheduler support"
- depends on SCHED_MC && CPU_SUP_INTEL
- select X86_INTEL_PSTATE
+ depends on SCHED_MC
select CPU_FREQ
default y
help
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 7a99b19bb893..a2d0e27b6864 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -56,7 +56,7 @@ struct cppc_pcc_data {
/*
* Lock to provide controlled access to the PCC channel.
*
- * For performance critical usecases(currently cppc_set_perf)
+ * For performance-critical usecases(currently cppc_set_reg)
* We need to take read_lock and check if channel belongs to OSPM
* before reading or writing to PCC subspace
* We need to take write_lock before transferring the channel
@@ -111,6 +111,14 @@ static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr);
#define CPC_SUPPORTED(cpc) ((cpc)->type == ACPI_TYPE_INTEGER ? \
!!(cpc)->cpc_entry.int_value : \
!IS_NULL_REG(&(cpc)->cpc_entry.reg))
+
+/*
+ * Evaluates to True if an optional cpc field is supported and is
+ * BUFFER only
+ */
+#define CPC_SUP_BUFFER_ONLY(cpc) ((cpc)->type == ACPI_TYPE_BUFFER && \
+ !IS_NULL_REG(&(cpc)->cpc_entry.reg))
+
/*
* Arbitrary Retries in case the remote processor is slow to respond
* to PCC commands. Keeping it high enough to cover emulators where
@@ -175,22 +183,8 @@ static ssize_t show_feedback_ctrs(struct kobject *kobj,
}
define_one_cppc_ro(feedback_ctrs);
-static struct attribute *cppc_attrs[] = {
- &feedback_ctrs.attr,
- &reference_perf.attr,
- &wraparound_time.attr,
- &highest_perf.attr,
- &lowest_perf.attr,
- &lowest_nonlinear_perf.attr,
- &nominal_perf.attr,
- &nominal_freq.attr,
- &lowest_freq.attr,
- NULL
-};
-
static struct kobj_type cppc_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
- .default_attrs = cppc_attrs,
};
static int check_pcc_chan(int pcc_ss_id, bool chk_err_bit)
@@ -688,6 +682,89 @@ static bool is_cppc_supported(int revision, int num_ent)
* }
*/
+static bool is_buf_only(int reg_idx)
+{
+ switch (reg_idx) {
+ case HIGHEST_PERF:
+ case NOMINAL_PERF:
+ case LOW_NON_LINEAR_PERF:
+ case LOWEST_PERF:
+ case CTR_WRAP_TIME:
+ case AUTO_SEL_ENABLE:
+ case REFERENCE_PERF:
+ return false;
+ default:
+ return true;
+ }
+}
+
+#define REG_SUPPORTED(cpc, idx) (is_buf_only(idx) ? \
+ CPC_SUP_BUFFER_ONLY(&cpc->cpc_regs[idx]) : \
+ CPC_SUPPORTED(&cpc->cpc_regs[idx]))
+
+static int is_mandatory_reg(int reg_idx)
+{
+ switch (reg_idx) {
+ case HIGHEST_PERF:
+ case NOMINAL_PERF:
+ case LOW_NON_LINEAR_PERF:
+ case LOWEST_PERF:
+ case REFERENCE_CTR:
+ case DELIVERED_CTR:
+ return 1;
+ }
+
+ return 0;
+}
+
+#define MANDATORY_REG_CNT 6
+
+static int set_cppc_attrs(struct cpc_desc *cpc, int entries)
+{
+ int i, attr_i = 0, opt_reg_cnt;
+ static struct attribute **cppc_attrs;
+
+ cppc_attrs = kcalloc(entries, sizeof(*cppc_attrs), GFP_KERNEL);
+ if (!cppc_attrs)
+ return -ENOMEM;
+
+ /* Set optional regs */
+ opt_reg_cnt = entries - MANDATORY_REG_CNT;
+ for (i = 0; i < MAX_CPC_REG_ENT && attr_i < opt_reg_cnt; i++) {
+ if (is_mandatory_reg(i) || !REG_SUPPORTED(cpc, i))
+ continue;
+
+ switch (i) {
+ case NOMINAL_FREQ:
+ cppc_attrs[attr_i++] = &nominal_freq.attr;
+ break;
+ case LOWEST_FREQ:
+ cppc_attrs[attr_i++] = &lowest_freq.attr;
+ break;
+ case REFERENCE_PERF:
+ cppc_attrs[attr_i++] = &reference_perf.attr;
+ break;
+ case CTR_WRAP_TIME:
+ cppc_attrs[attr_i++] = &wraparound_time.attr;
+ break;
+ }
+ }
+
+ /* Set mandatory regs */
+ cppc_attrs[attr_i++] = &highest_perf.attr;
+ cppc_attrs[attr_i++] = &nominal_perf.attr;
+ cppc_attrs[attr_i++] = &lowest_nonlinear_perf.attr;
+ cppc_attrs[attr_i++] = &lowest_perf.attr;
+
+ /* Set feedback_ctr sysfs entry */
+ cppc_attrs[attr_i] = &feedback_ctrs.attr;
+
+ /* Set kobj_type member */
+ cppc_ktype.default_attrs = cppc_attrs;
+
+ return 0;
+}
+
/**
* acpi_cppc_processor_probe - Search for per CPU _CPC objects.
* @pr: Ptr to acpi_processor containing this CPU's logical ID.
@@ -842,6 +919,10 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
/* Plug PSD data into this CPU's CPC descriptor. */
per_cpu(cpc_desc_ptr, pr->id) = cpc_ptr;
+ ret = set_cppc_attrs(cpc_ptr, num_ent - 2);
+ if (ret)
+ goto out_free;
+
ret = kobject_init_and_add(&cpc_ptr->kobj, &cppc_ktype, &cpu_dev->kobj,
"acpi_cppc");
if (ret) {
@@ -904,6 +985,7 @@ void acpi_cppc_processor_exit(struct acpi_processor *pr)
iounmap(addr);
}
+ kfree(cppc_ktype.default_attrs);
kobject_put(&cpc_ptr->kobj);
kfree(cpc_ptr);
}
@@ -1243,26 +1325,53 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
EXPORT_SYMBOL_GPL(cppc_get_perf_ctrs);
/**
- * cppc_set_perf - Set a CPU's performance controls.
- * @cpu: CPU for which to set performance controls.
- * @perf_ctrls: ptr to cppc_perf_ctrls. See cppc_acpi.h
+ * cppc_set_reg - Set the CPUs control register.
+ * @cpu: CPU for which to set the register.
+ * @ctrls: ptr to cppc_ctrls. See cppc_acpi.h
+ * @reg_idx: Index of the register being accessed
*
* Return: 0 for success, -ERRNO otherwise.
*/
-int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
+int cppc_set_reg(int cpu, struct cppc_ctrls *ctrls,
+ enum cppc_regs reg_idx)
{
struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
- struct cpc_register_resource *desired_reg;
int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
struct cppc_pcc_data *pcc_ss_data = NULL;
+ struct cpc_register_resource *reg;
int ret = 0;
+ u32 value;
if (!cpc_desc) {
pr_debug("No CPC descriptor for CPU:%d\n", cpu);
return -ENODEV;
}
- desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF];
+ switch (reg_idx) {
+ case ENABLE:
+ value = ctrls->enable;
+ break;
+ case DESIRED_PERF:
+ value = ctrls->desired_perf;
+ break;
+ case MAX_PERF:
+ value = ctrls->max_perf;
+ break;
+ case MIN_PERF:
+ value = ctrls->min_perf;
+ break;
+ case ENERGY_PERF:
+ value = ctrls->energy_perf;
+ break;
+ case AUTO_SEL_ENABLE:
+ value = ctrls->auto_sel_enable;
+ break;
+ default:
+ pr_debug("CPC register index #%d not writeable\n", reg_idx);
+ return -EINVAL;
+ }
+
+ reg = &cpc_desc->cpc_regs[reg_idx];
/*
* This is Phase-I where we want to write to CPC registers
@@ -1271,7 +1380,7 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
* Since read_lock can be acquired by multiple CPUs simultaneously we
* achieve that goal here
*/
- if (CPC_IN_PCC(desired_reg)) {
+ if (CPC_IN_PCC(reg)) {
if (pcc_ss_id < 0) {
pr_debug("Invalid pcc_ss_id\n");
return -ENODEV;
@@ -1294,18 +1403,15 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
cpc_desc->write_cmd_status = 0;
}
- /*
- * Skip writing MIN/MAX until Linux knows how to come up with
- * useful values.
- */
- cpc_write(cpu, desired_reg, perf_ctrls->desired_perf);
+ if (CPC_SUPPORTED(reg))
+ cpc_write(cpu, reg, value);
- if (CPC_IN_PCC(desired_reg))
+ if (CPC_IN_PCC(reg))
up_read(&pcc_ss_data->pcc_lock); /* END Phase-I */
/*
* This is Phase-II where we transfer the ownership of PCC to Platform
*
- * Short Summary: Basically if we think of a group of cppc_set_perf
+ * Short Summary: Basically if we think of a group of cppc_set_reg
* requests that happened in short overlapping interval. The last CPU to
* come out of Phase-I will enter Phase-II and ring the doorbell.
*
@@ -1348,7 +1454,7 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
* case during a CMD_READ and if there are pending writes it delivers
* the write command before servicing the read command
*/
- if (CPC_IN_PCC(desired_reg)) {
+ if (CPC_IN_PCC(reg)) {
if (down_write_trylock(&pcc_ss_data->pcc_lock)) {/* BEGIN Phase-II */
/* Update only if there are pending write commands */
if (pcc_ss_data->pending_pcc_write_cmd)
@@ -1364,7 +1470,83 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
}
return ret;
}
-EXPORT_SYMBOL_GPL(cppc_set_perf);
+EXPORT_SYMBOL_GPL(cppc_set_reg);
+
+int cppc_get_ctrls(int cpu, struct cppc_ctrls *ctrls)
+{
+ struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
+ struct cpc_register_resource *desired_reg, *max_reg, *min_reg;
+ struct cpc_register_resource *energy_reg, *auto_sel_enable_reg;
+ struct cpc_register_resource *enable_reg;
+ int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
+ u64 desired, max, min, energy, auto_sel_enable, enable;
+ struct cppc_pcc_data *pcc_ss_data = NULL;
+ int ret = 0, regs_in_pcc = 0;
+
+ if (!cpc_desc) {
+ pr_debug("No CPC descriptor for CPU: %d\n", cpu);
+ return -ENODEV;
+ }
+
+ enable_reg = &cpc_desc->cpc_regs[ENABLE];
+ desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF];
+ max_reg = &cpc_desc->cpc_regs[MAX_PERF];
+ min_reg = &cpc_desc->cpc_regs[MIN_PERF];
+ energy_reg = &cpc_desc->cpc_regs[ENERGY_PERF];
+ auto_sel_enable_reg = &cpc_desc->cpc_regs[AUTO_SEL_ENABLE];
+
+ /* Check if any of the perf registers are in PCC */
+ if (CPC_IN_PCC(desired_reg) || CPC_IN_PCC(max_reg) ||
+ CPC_IN_PCC(min_reg) || CPC_IN_PCC(energy_reg) ||
+ CPC_IN_PCC(auto_sel_enable_reg) || CPC_IN_PCC(enable_reg)) {
+ pcc_ss_data = pcc_data[pcc_ss_id];
+ down_write(&pcc_ss_data->pcc_lock);
+ regs_in_pcc = 1;
+
+ /*Ring doorbell once to update PCC subspace */
+ if (send_pcc_cmd(pcc_ss_id, CMD_READ) < 0) {
+ ret = -EIO;
+ goto out_err;
+ }
+ }
+
+ /* desired_perf is the only mandatory value in ctrls */
+ if (cpc_read(cpu, desired_reg, &desired))
+ ret = -EFAULT;
+
+ if (CPC_SUP_BUFFER_ONLY(enable_reg) &&
+ cpc_read(cpu, enable_reg, &enable))
+ ret = -EFAULT;
+
+ if (CPC_SUP_BUFFER_ONLY(max_reg) && cpc_read(cpu, max_reg, &max))
+ ret = -EFAULT;
+
+ if (CPC_SUP_BUFFER_ONLY(min_reg) && cpc_read(cpu, min_reg, &min))
+ ret = -EFAULT;
+
+ if (CPC_SUP_BUFFER_ONLY(energy_reg) &&
+ cpc_read(cpu, energy_reg, &energy))
+ ret = -EFAULT;
+
+ if (CPC_SUPPORTED(auto_sel_enable_reg) &&
+ cpc_read(cpu, auto_sel_enable_reg, &auto_sel_enable))
+ ret = -EFAULT;
+
+ if (!ret) {
+ ctrls->enable = enable;
+ ctrls->desired_perf = desired;
+ ctrls->max_perf = max;
+ ctrls->min_perf = min;
+ ctrls->energy_perf = energy;
+ ctrls->auto_sel_enable = auto_sel_enable;
+ }
+
+out_err:
+ if (regs_in_pcc)
+ up_write(&pcc_ss_data->pcc_lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(cppc_get_ctrls);
/**
* cppc_get_transition_latency - returns frequency transition latency in ns
diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index 399526289320..c03cc6d14bad 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -34,6 +34,20 @@ config X86_PCC_CPUFREQ
If in doubt, say N.
+config X86_AMD_CPUFREQ
+ tristate "AMD CPUFreq driver"
+ depends on ACPI_PROCESSOR
+ select ACPI_CPPC_LIB
+ help
+ This adds a CPUFreq driver which uses CPPC methods
+ as described in the ACPI v6.1 spec for newer (>= Fam17h)
+ AMD processors.
+
+ When this driver is enabled it will become preferred to
+ the acpi-cpufreq driver.
+
+ If in doubt, say N.
+
config X86_ACPI_CPUFREQ
tristate "ACPI Processor P-States driver"
depends on ACPI_PROCESSOR
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index f6670c4abbb0..4562b53ad4c9 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -22,8 +22,10 @@ obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o
# Link order matters. K8 is preferred to ACPI because of firmware bugs in early
# K8 systems. This is still the case but acpi-cpufreq errors out so that
# powernow-k8 can load then. ACPI is preferred to all other hardware-specific drivers.
-# speedstep-* is preferred over p4-clockmod.
+# speedstep-* is preferred over p4-clockmod. amd-cpufreq is preferred to acpi-cpufreq
+# for Fam17h or newer AMD processors. For others, acpi-cpufreq will be used.
+obj-$(CONFIG_X86_AMD_CPUFREQ) += amd-cpufreq.o
obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o
obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o
diff --git a/drivers/cpufreq/amd-cpufreq.c b/drivers/cpufreq/amd-cpufreq.c
new file mode 100644
index 000000000000..dc92b716d39e
--- /dev/null
+++ b/drivers/cpufreq/amd-cpufreq.c
@@ -0,0 +1,305 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD CPUFREQ driver for Family 17h or greater AMD processors.
+ *
+ * Copyright (C) 2019 Advanced Micro Devices, Inc.
+ *
+ * Author: Janakarajan Natarajan <janakarajan.natarajan@amd.com>
+ *
+ * Additional ITMT code:
+ * (C) Copyright 2012 Intel Corporation
+ * Author: Dirk Brandewie <dirk.j.brandewie@intel.com>
+ *
+ */
+#define pr_fmt(fmt) "AMD Cpufreq: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/vmalloc.h>
+#include <linux/cpufreq.h>
+#include <linux/acpi.h>
+#include <linux/delay.h>
+
+#include <asm/unaligned.h>
+
+#include <acpi/cppc_acpi.h>
+
+struct amd_desc {
+ int cpu_id;
+ struct cppc_ctrls ctrls;
+ struct kobject kobj;
+};
+
+struct amd_desc **all_cpu_data;
+
+static unsigned int cppc_enable;
+static unsigned int itmt_enable;
+module_param(cppc_enable, uint, 0644);
+module_param(itmt_enable, uint, 0644);
+MODULE_PARM_DESC(cppc_enable,
+ "1 - enable AMD CpuFreq, create CPPC sysfs entries.");
+MODULE_PARM_DESC(itmt_enable,
+ "2 - enable preferred cores based on CPPC information");
+
+#define to_amd_desc(a) container_of(a, struct amd_desc, kobj)
+
+#define show_func(access_fn, struct_name, member_name) \
+ static ssize_t show_##member_name(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ char *buf) \
+ { \
+ struct amd_desc *desc = to_amd_desc(kobj); \
+ struct struct_name st_name = {0}; \
+ int ret; \
+ \
+ ret = access_fn(desc->cpu_id, &st_name); \
+ if (ret) \
+ return ret; \
+ \
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", \
+ (u64)st_name.member_name); \
+ } \
+
+#define store_func(struct_name, member_name, reg_idx) \
+ static ssize_t store_##member_name(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ const char *buf, size_t count)\
+ { \
+ struct amd_desc *desc = to_amd_desc(kobj); \
+ struct struct_name st_name = {0}; \
+ u32 val; \
+ int ret; \
+ \
+ ret = kstrtou32(buf, 0, &val); \
+ if (ret) \
+ return ret; \
+ \
+ st_name.member_name = val; \
+ \
+ ret = cppc_set_reg(desc->cpu_id, &st_name, reg_idx); \
+ if (ret) \
+ return ret; \
+ \
+ return count; \
+ } \
+
+#define define_one_rw(struct_name, access_fn, member_name, reg_idx) \
+ show_func(access_fn, struct_name, member_name) \
+ store_func(struct_name, member_name, reg_idx) \
+ define_one_global_rw(member_name)
+
+define_one_rw(cppc_ctrls, cppc_get_ctrls, enable, ENABLE);
+define_one_rw(cppc_ctrls, cppc_get_ctrls, max_perf, MAX_PERF);
+define_one_rw(cppc_ctrls, cppc_get_ctrls, min_perf, MIN_PERF);
+define_one_rw(cppc_ctrls, cppc_get_ctrls, desired_perf, DESIRED_PERF);
+define_one_rw(cppc_ctrls, cppc_get_ctrls, auto_sel_enable, AUTO_SEL_ENABLE);
+
+static struct attribute *amd_cpufreq_attributes[] = {
+ &enable.attr,
+ &max_perf.attr,
+ &min_perf.attr,
+ &desired_perf.attr,
+ &auto_sel_enable.attr,
+ NULL
+};
+
+static const struct attribute_group amd_cpufreq_attr_group = {
+ .attrs = amd_cpufreq_attributes,
+};
+
+static struct kobj_type amd_cpufreq_type = {
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_attrs = amd_cpufreq_attributes,
+};
+
+#ifdef CONFIG_ACPI_CPPC_LIB
+
+/* The work item is needed to avoid CPU hotplug locking issues */
+static void amd_cpufreq_sched_itmt_work_fn(struct work_struct *work)
+{
+ sched_set_itmt_support();
+}
+
+static DECLARE_WORK(sched_itmt_work, amd_cpufreq_sched_itmt_work_fn);
+
+static void amd_cpufreq_set_itmt_prio(int cpu)
+{
+ struct cppc_perf_caps cppc_perf;
+ static u32 max_highest_perf = 0, min_highest_perf = U32_MAX;
+ int ret;
+
+ ret = cppc_get_perf_caps(cpu, &cppc_perf);
+ if (ret)
+ return;
+
+ /*
+ * The priorities can be set regardless of whether or not
+ * sched_set_itmt_support(true) has been called and it is valid to
+ * update them at any time after it has been called.
+ */
+ pr_info("CPU %d perf %d\n", cpu, (int)cppc_perf.highest_perf);
+ sched_set_itmt_core_prio(cppc_perf.highest_perf, cpu);
+
+ if (max_highest_perf <= min_highest_perf) {
+ if (cppc_perf.highest_perf > max_highest_perf)
+ max_highest_perf = cppc_perf.highest_perf;
+
+ if (cppc_perf.highest_perf < min_highest_perf)
+ min_highest_perf = cppc_perf.highest_perf;
+
+ if (max_highest_perf > min_highest_perf) {
+ /*
+ * This code can be run during CPU online under the
+ * CPU hotplug locks, so sched_set_itmt_support()
+ * cannot be called from here. Queue up a work item
+ * to invoke it.
+ */
+ pr_info("CPU %d ITMT enable\n", cpu);
+ schedule_work(&sched_itmt_work);
+ }
+ }
+}
+
+#else /* CONFIG_ACPI_CPPC_LIB */
+static void amd_cpufreq_set_itmt_prio(int cpu)
+{
+}
+#endif /* CONFIG_ACPI_CPPC_LIB */
+
+static void amd_cpufreq_init_acpi_perf_limits(struct cpufreq_policy *policy)
+{
+ if (!itmt_enable)
+ return;
+
+ amd_cpufreq_set_itmt_prio(policy->cpu);
+}
+
+static int amd_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+ amd_cpufreq_init_acpi_perf_limits(policy);
+ return 0;
+}
+
+static int amd_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+ return 0;
+}
+
+static int amd_cpufreq_cpu_verify(struct cpufreq_policy_data *policy_data)
+{
+ return 0;
+}
+
+static int amd_cpufreq_cpu_target_index(struct cpufreq_policy *policy,
+ unsigned int index)
+{
+ return 0;
+}
+
+static struct cpufreq_driver amd_cpufreq_driver = {
+ .name = "amd_cpufreq",
+ .init = amd_cpufreq_cpu_init,
+ .exit = amd_cpufreq_cpu_exit,
+ .verify = amd_cpufreq_cpu_verify,
+ .target_index = amd_cpufreq_cpu_target_index,
+};
+
+static void amd_cpufreq_sysfs_delete_params(void)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ if (all_cpu_data[i]) {
+ kobject_del(&all_cpu_data[i]->kobj);
+ kfree(all_cpu_data[i]);
+ }
+ }
+
+ kfree(all_cpu_data);
+}
+
+static int __init amd_cpufreq_sysfs_expose_params(void)
+{
+ struct device *cpu_dev;
+ int i, ret;
+
+ all_cpu_data = kcalloc(num_possible_cpus(), sizeof(void *),
+ GFP_KERNEL);
+
+ if (!all_cpu_data)
+ return -ENOMEM;
+
+ for_each_possible_cpu(i) {
+ all_cpu_data[i] = kzalloc(sizeof(struct amd_desc), GFP_KERNEL);
+ if (!all_cpu_data[i]) {
+ ret = -ENOMEM;
+ goto free;
+ }
+
+ all_cpu_data[i]->cpu_id = i;
+ cpu_dev = get_cpu_device(i);
+ ret = kobject_init_and_add(&all_cpu_data[i]->kobj, &amd_cpufreq_type,
+ &cpu_dev->kobj, "amd_cpufreq");
+ if (ret)
+ goto free;
+ }
+
+ return 0;
+free:
+ amd_cpufreq_sysfs_delete_params();
+ return ret;
+}
+
+static int __init amd_cpufreq_init(void)
+{
+ int ret = 0;
+
+ /*
+ * Use only if:
+ * - AMD,
+ * - Family 17h (or) newer and,
+ * - Explicitly enabled
+ */
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD ||
+ boot_cpu_data.x86 < 0x17 || !cppc_enable)
+ return -ENODEV;
+
+ ret = cpufreq_register_driver(&amd_cpufreq_driver);
+ if (ret) {
+ pr_info("Failed to register driver\n");
+ goto out;
+ }
+
+ ret = amd_cpufreq_sysfs_expose_params();
+ if (ret) {
+ pr_info("Could not create sysfs entries\n");
+ cpufreq_unregister_driver(&amd_cpufreq_driver);
+ goto out;
+ }
+
+ pr_info("Using amd-cpufreq driver\n");
+ return ret;
+
+out:
+ return ret;
+}
+
+static void __exit amd_cpufreq_exit(void)
+{
+ amd_cpufreq_sysfs_delete_params();
+ cpufreq_unregister_driver(&amd_cpufreq_driver);
+}
+
+static const struct acpi_device_id amd_acpi_ids[] __used = {
+ {ACPI_PROCESSOR_DEVICE_HID, },
+ {}
+};
+
+device_initcall(amd_cpufreq_init);
+module_exit(amd_cpufreq_exit);
+MODULE_DEVICE_TABLE(acpi, amd_acpi_ids);
+
+MODULE_AUTHOR("Janakarajan Natarajan");
+MODULE_DESCRIPTION("AMD CPUFreq driver based on ACPI CPPC v6.1 spec");
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 257d726a4456..dcff2eb8ed51 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -212,7 +212,7 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy,
freqs.new = target_freq;
cpufreq_freq_transition_begin(policy, &freqs);
- ret = cppc_set_perf(cpu->cpu, &cpu->perf_ctrls);
+ ret = cppc_set_reg(cpu->cpu, &cpu->perf_ctrls, DESIRED_PERF);
cpufreq_freq_transition_end(policy, &freqs, ret != 0);
if (ret)
@@ -236,7 +236,7 @@ static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy)
cpu->perf_ctrls.desired_perf = cpu->perf_caps.lowest_perf;
- ret = cppc_set_perf(cpu_num, &cpu->perf_ctrls);
+ ret = cppc_set_reg(cpu_num, &cpu->perf_ctrls, DESIRED_PERF);
if (ret)
pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n",
cpu->perf_caps.lowest_perf, cpu_num, ret);
@@ -356,7 +356,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
cpu->perf_caps.highest_perf);
cpu->perf_ctrls.desired_perf = cpu->perf_caps.highest_perf;
- ret = cppc_set_perf(cpu_num, &cpu->perf_ctrls);
+ ret = cppc_set_reg(cpu_num, &cpu->perf_ctrls, DESIRED_PERF);
if (ret)
pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n",
cpu->perf_caps.highest_perf, cpu_num, ret);
diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index a6a9373ab863..e6cd2a487874 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -109,10 +109,13 @@ struct cppc_perf_caps {
u32 nominal_freq;
};
-struct cppc_perf_ctrls {
+struct cppc_ctrls {
+ bool enable;
u32 max_perf;
u32 min_perf;
u32 desired_perf;
+ u32 auto_sel_enable;
+ u32 energy_perf;
};
struct cppc_perf_fb_ctrs {
@@ -126,16 +129,18 @@ struct cppc_perf_fb_ctrs {
struct cppc_cpudata {
int cpu;
struct cppc_perf_caps perf_caps;
- struct cppc_perf_ctrls perf_ctrls;
+ struct cppc_ctrls ctrls;
struct cppc_perf_fb_ctrs perf_fb_ctrs;
struct cpufreq_policy *cur_policy;
unsigned int shared_type;
cpumask_var_t shared_cpu_map;
};
+extern int cppc_get_enable(int cpu);
extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf);
extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs);
-extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
+extern int cppc_set_reg(int cpu, struct cppc_ctrls *ctrls, enum cppc_regs reg_idx);
+extern int cppc_get_ctrls(int cpu, struct cppc_ctrls *ctrls);
extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps);
extern int acpi_get_psd_map(struct cppc_cpudata **);
extern unsigned int cppc_get_transition_latency(int cpu);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ffbd9a3d78d8..063c7ea1b3ad 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -484,6 +484,8 @@ static const struct x86_cpu_id snc_cpu[] = {
static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
+ return true;
+
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
/* Do not match if we do not have a valid APICID for cpu: */
@@ -650,7 +652,7 @@ void set_cpu_sibling_map(int cpu)
/* maps the cpu to the sched domain representing multi-core */
const struct cpumask *cpu_coregroup_mask(int cpu)
{
- return cpu_llc_shared_mask(cpu);
+ return cpu_llc_shared_mask(0);
}
static void impress_friends(void)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2142c6767682..51b9e3c36e9c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2352,7 +2352,8 @@ void wake_up_if_idle(int cpu)
bool cpus_share_cache(int this_cpu, int that_cpu)
{
- return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
+ return true;
+ //return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
}
static inline bool ttwu_queue_cond(int cpu, int wake_flags)
@rxrbln
Copy link

rxrbln commented Aug 29, 2023

thank you for your effort! AFAICS removing the cache domains schedules the threads also on SMT siblings, instead on other, full resource and idle cpu cores.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment