1/*
2 * POWERNV cpufreq driver for the IBM POWER processors
3 *
4 * (C) Copyright IBM 2014
5 *
6 * Author: Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2, or (at your option)
11 * any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 * GNU General Public License for more details.
17 *
18 */
19
20#define pr_fmt(fmt)	"powernv-cpufreq: " fmt
21
22#include <linux/kernel.h>
23#include <linux/sysfs.h>
24#include <linux/cpumask.h>
25#include <linux/module.h>
26#include <linux/cpufreq.h>
27#include <linux/smp.h>
28#include <linux/of.h>
29#include <linux/reboot.h>
30
31#include <asm/cputhreads.h>
32#include <asm/firmware.h>
33#include <asm/reg.h>
34#include <asm/smp.h> /* Required for cpu_sibling_mask() in UP configs */
35
36#define POWERNV_MAX_PSTATES	256
37#define PMSR_PSAFE_ENABLE	(1UL << 30)
38#define PMSR_SPR_EM_DISABLE	(1UL << 31)
39#define PMSR_MAX(x)		((x >> 32) & 0xFF)
40#define PMSR_LP(x)		((x >> 48) & 0xFF)
41
42static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
43static bool rebooting, throttled;
44
45/*
46 * Note: The set of pstates consists of contiguous integers, the
47 * smallest of which is indicated by powernv_pstate_info.min, the
48 * largest of which is indicated by powernv_pstate_info.max.
49 *
50 * The nominal pstate is the highest non-turbo pstate in this
51 * platform. This is indicated by powernv_pstate_info.nominal.
52 */
53static struct powernv_pstate_info {
54	int min;
55	int max;
56	int nominal;
57	int nr_pstates;
58} powernv_pstate_info;
59
60/*
61 * Initialize the freq table based on data obtained
62 * from the firmware passed via device-tree
63 */
64static int init_powernv_pstates(void)
65{
66	struct device_node *power_mgt;
67	int i, pstate_min, pstate_max, pstate_nominal, nr_pstates = 0;
68	const __be32 *pstate_ids, *pstate_freqs;
69	u32 len_ids, len_freqs;
70
71	power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
72	if (!power_mgt) {
73		pr_warn("power-mgt node not found\n");
74		return -ENODEV;
75	}
76
77	if (of_property_read_u32(power_mgt, "ibm,pstate-min", &pstate_min)) {
78		pr_warn("ibm,pstate-min node not found\n");
79		return -ENODEV;
80	}
81
82	if (of_property_read_u32(power_mgt, "ibm,pstate-max", &pstate_max)) {
83		pr_warn("ibm,pstate-max node not found\n");
84		return -ENODEV;
85	}
86
87	if (of_property_read_u32(power_mgt, "ibm,pstate-nominal",
88				 &pstate_nominal)) {
89		pr_warn("ibm,pstate-nominal not found\n");
90		return -ENODEV;
91	}
92	pr_info("cpufreq pstate min %d nominal %d max %d\n", pstate_min,
93		pstate_nominal, pstate_max);
94
95	pstate_ids = of_get_property(power_mgt, "ibm,pstate-ids", &len_ids);
96	if (!pstate_ids) {
97		pr_warn("ibm,pstate-ids not found\n");
98		return -ENODEV;
99	}
100
101	pstate_freqs = of_get_property(power_mgt, "ibm,pstate-frequencies-mhz",
102				      &len_freqs);
103	if (!pstate_freqs) {
104		pr_warn("ibm,pstate-frequencies-mhz not found\n");
105		return -ENODEV;
106	}
107
108	if (len_ids != len_freqs) {
109		pr_warn("Entries in ibm,pstate-ids and "
110			"ibm,pstate-frequencies-mhz does not match\n");
111	}
112
113	nr_pstates = min(len_ids, len_freqs) / sizeof(u32);
114	if (!nr_pstates) {
115		pr_warn("No PStates found\n");
116		return -ENODEV;
117	}
118
119	pr_debug("NR PStates %d\n", nr_pstates);
120	for (i = 0; i < nr_pstates; i++) {
121		u32 id = be32_to_cpu(pstate_ids[i]);
122		u32 freq = be32_to_cpu(pstate_freqs[i]);
123
124		pr_debug("PState id %d freq %d MHz\n", id, freq);
125		powernv_freqs[i].frequency = freq * 1000; /* kHz */
126		powernv_freqs[i].driver_data = id;
127	}
128	/* End of list marker entry */
129	powernv_freqs[i].frequency = CPUFREQ_TABLE_END;
130
131	powernv_pstate_info.min = pstate_min;
132	powernv_pstate_info.max = pstate_max;
133	powernv_pstate_info.nominal = pstate_nominal;
134	powernv_pstate_info.nr_pstates = nr_pstates;
135
136	return 0;
137}
138
139/* Returns the CPU frequency corresponding to the pstate_id. */
140static unsigned int pstate_id_to_freq(int pstate_id)
141{
142	int i;
143
144	i = powernv_pstate_info.max - pstate_id;
145	if (i >= powernv_pstate_info.nr_pstates || i < 0) {
146		pr_warn("PState id %d outside of PState table, "
147			"reporting nominal id %d instead\n",
148			pstate_id, powernv_pstate_info.nominal);
149		i = powernv_pstate_info.max - powernv_pstate_info.nominal;
150	}
151
152	return powernv_freqs[i].frequency;
153}
154
155/*
156 * cpuinfo_nominal_freq_show - Show the nominal CPU frequency as indicated by
157 * the firmware
158 */
159static ssize_t cpuinfo_nominal_freq_show(struct cpufreq_policy *policy,
160					char *buf)
161{
162	return sprintf(buf, "%u\n",
163		pstate_id_to_freq(powernv_pstate_info.nominal));
164}
165
166struct freq_attr cpufreq_freq_attr_cpuinfo_nominal_freq =
167	__ATTR_RO(cpuinfo_nominal_freq);
168
169static struct freq_attr *powernv_cpu_freq_attr[] = {
170	&cpufreq_freq_attr_scaling_available_freqs,
171	&cpufreq_freq_attr_cpuinfo_nominal_freq,
172	NULL,
173};
174
175/* Helper routines */
176
177/* Access helpers to power mgt SPR */
178
179static inline unsigned long get_pmspr(unsigned long sprn)
180{
181	switch (sprn) {
182	case SPRN_PMCR:
183		return mfspr(SPRN_PMCR);
184
185	case SPRN_PMICR:
186		return mfspr(SPRN_PMICR);
187
188	case SPRN_PMSR:
189		return mfspr(SPRN_PMSR);
190	}
191	BUG();
192}
193
194static inline void set_pmspr(unsigned long sprn, unsigned long val)
195{
196	switch (sprn) {
197	case SPRN_PMCR:
198		mtspr(SPRN_PMCR, val);
199		return;
200
201	case SPRN_PMICR:
202		mtspr(SPRN_PMICR, val);
203		return;
204	}
205	BUG();
206}
207
208/*
209 * Use objects of this type to query/update
210 * pstates on a remote CPU via smp_call_function.
211 */
212struct powernv_smp_call_data {
213	unsigned int freq;
214	int pstate_id;
215};
216
217/*
218 * powernv_read_cpu_freq: Reads the current frequency on this CPU.
219 *
220 * Called via smp_call_function.
221 *
222 * Note: The caller of the smp_call_function should pass an argument of
223 * the type 'struct powernv_smp_call_data *' along with this function.
224 *
225 * The current frequency on this CPU will be returned via
226 * ((struct powernv_smp_call_data *)arg)->freq;
227 */
228static void powernv_read_cpu_freq(void *arg)
229{
230	unsigned long pmspr_val;
231	s8 local_pstate_id;
232	struct powernv_smp_call_data *freq_data = arg;
233
234	pmspr_val = get_pmspr(SPRN_PMSR);
235
236	/*
237	 * The local pstate id corresponds bits 48..55 in the PMSR.
238	 * Note: Watch out for the sign!
239	 */
240	local_pstate_id = (pmspr_val >> 48) & 0xFF;
241	freq_data->pstate_id = local_pstate_id;
242	freq_data->freq = pstate_id_to_freq(freq_data->pstate_id);
243
244	pr_debug("cpu %d pmsr %016lX pstate_id %d frequency %d kHz\n",
245		raw_smp_processor_id(), pmspr_val, freq_data->pstate_id,
246		freq_data->freq);
247}
248
249/*
250 * powernv_cpufreq_get: Returns the CPU frequency as reported by the
251 * firmware for CPU 'cpu'. This value is reported through the sysfs
252 * file cpuinfo_cur_freq.
253 */
254static unsigned int powernv_cpufreq_get(unsigned int cpu)
255{
256	struct powernv_smp_call_data freq_data;
257
258	smp_call_function_any(cpu_sibling_mask(cpu), powernv_read_cpu_freq,
259			&freq_data, 1);
260
261	return freq_data.freq;
262}
263
264/*
265 * set_pstate: Sets the pstate on this CPU.
266 *
267 * This is called via an smp_call_function.
268 *
269 * The caller must ensure that freq_data is of the type
270 * (struct powernv_smp_call_data *) and the pstate_id which needs to be set
271 * on this CPU should be present in freq_data->pstate_id.
272 */
273static void set_pstate(void *freq_data)
274{
275	unsigned long val;
276	unsigned long pstate_ul =
277		((struct powernv_smp_call_data *) freq_data)->pstate_id;
278
279	val = get_pmspr(SPRN_PMCR);
280	val = val & 0x0000FFFFFFFFFFFFULL;
281
282	pstate_ul = pstate_ul & 0xFF;
283
284	/* Set both global(bits 56..63) and local(bits 48..55) PStates */
285	val = val | (pstate_ul << 56) | (pstate_ul << 48);
286
287	pr_debug("Setting cpu %d pmcr to %016lX\n",
288			raw_smp_processor_id(), val);
289	set_pmspr(SPRN_PMCR, val);
290}
291
292/*
293 * get_nominal_index: Returns the index corresponding to the nominal
294 * pstate in the cpufreq table
295 */
296static inline unsigned int get_nominal_index(void)
297{
298	return powernv_pstate_info.max - powernv_pstate_info.nominal;
299}
300
301static void powernv_cpufreq_throttle_check(unsigned int cpu)
302{
303	unsigned long pmsr;
304	int pmsr_pmax, pmsr_lp;
305
306	pmsr = get_pmspr(SPRN_PMSR);
307
308	/* Check for Pmax Capping */
309	pmsr_pmax = (s8)PMSR_MAX(pmsr);
310	if (pmsr_pmax != powernv_pstate_info.max) {
311		throttled = true;
312		pr_info("CPU %d Pmax is reduced to %d\n", cpu, pmsr_pmax);
313		pr_info("Max allowed Pstate is capped\n");
314	}
315
316	/*
317	 * Check for Psafe by reading LocalPstate
318	 * or check if Psafe_mode_active is set in PMSR.
319	 */
320	pmsr_lp = (s8)PMSR_LP(pmsr);
321	if ((pmsr_lp < powernv_pstate_info.min) ||
322				(pmsr & PMSR_PSAFE_ENABLE)) {
323		throttled = true;
324		pr_info("Pstate set to safe frequency\n");
325	}
326
327	/* Check if SPR_EM_DISABLE is set in PMSR */
328	if (pmsr & PMSR_SPR_EM_DISABLE) {
329		throttled = true;
330		pr_info("Frequency Control disabled from OS\n");
331	}
332
333	if (throttled) {
334		pr_info("PMSR = %16lx\n", pmsr);
335		pr_crit("CPU Frequency could be throttled\n");
336	}
337}
338
339/*
340 * powernv_cpufreq_target_index: Sets the frequency corresponding to
341 * the cpufreq table entry indexed by new_index on the cpus in the
342 * mask policy->cpus
343 */
344static int powernv_cpufreq_target_index(struct cpufreq_policy *policy,
345					unsigned int new_index)
346{
347	struct powernv_smp_call_data freq_data;
348
349	if (unlikely(rebooting) && new_index != get_nominal_index())
350		return 0;
351
352	if (!throttled)
353		powernv_cpufreq_throttle_check(smp_processor_id());
354
355	freq_data.pstate_id = powernv_freqs[new_index].driver_data;
356
357	/*
358	 * Use smp_call_function to send IPI and execute the
359	 * mtspr on target CPU.  We could do that without IPI
360	 * if current CPU is within policy->cpus (core)
361	 */
362	smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1);
363
364	return 0;
365}
366
367static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
368{
369	int base, i;
370
371	base = cpu_first_thread_sibling(policy->cpu);
372
373	for (i = 0; i < threads_per_core; i++)
374		cpumask_set_cpu(base + i, policy->cpus);
375
376	return cpufreq_table_validate_and_show(policy, powernv_freqs);
377}
378
379static int powernv_cpufreq_reboot_notifier(struct notifier_block *nb,
380				unsigned long action, void *unused)
381{
382	int cpu;
383	struct cpufreq_policy cpu_policy;
384
385	rebooting = true;
386	for_each_online_cpu(cpu) {
387		cpufreq_get_policy(&cpu_policy, cpu);
388		powernv_cpufreq_target_index(&cpu_policy, get_nominal_index());
389	}
390
391	return NOTIFY_DONE;
392}
393
394static struct notifier_block powernv_cpufreq_reboot_nb = {
395	.notifier_call = powernv_cpufreq_reboot_notifier,
396};
397
398static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy)
399{
400	struct powernv_smp_call_data freq_data;
401
402	freq_data.pstate_id = powernv_pstate_info.min;
403	smp_call_function_single(policy->cpu, set_pstate, &freq_data, 1);
404}
405
406static struct cpufreq_driver powernv_cpufreq_driver = {
407	.name		= "powernv-cpufreq",
408	.flags		= CPUFREQ_CONST_LOOPS,
409	.init		= powernv_cpufreq_cpu_init,
410	.verify		= cpufreq_generic_frequency_table_verify,
411	.target_index	= powernv_cpufreq_target_index,
412	.get		= powernv_cpufreq_get,
413	.stop_cpu	= powernv_cpufreq_stop_cpu,
414	.attr		= powernv_cpu_freq_attr,
415};
416
417static int __init powernv_cpufreq_init(void)
418{
419	int rc = 0;
420
421	/* Don't probe on pseries (guest) platforms */
422	if (!firmware_has_feature(FW_FEATURE_OPALv3))
423		return -ENODEV;
424
425	/* Discover pstates from device tree and init */
426	rc = init_powernv_pstates();
427	if (rc) {
428		pr_info("powernv-cpufreq disabled. System does not support PState control\n");
429		return rc;
430	}
431
432	register_reboot_notifier(&powernv_cpufreq_reboot_nb);
433	return cpufreq_register_driver(&powernv_cpufreq_driver);
434}
435module_init(powernv_cpufreq_init);
436
437static void __exit powernv_cpufreq_exit(void)
438{
439	unregister_reboot_notifier(&powernv_cpufreq_reboot_nb);
440	cpufreq_unregister_driver(&powernv_cpufreq_driver);
441}
442module_exit(powernv_cpufreq_exit);
443
444MODULE_LICENSE("GPL");
445MODULE_AUTHOR("Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>");
446