1/*
2 * S390 Version
3 *   Copyright IBM Corp. 2002, 2011
4 *   Author(s): Thomas Spatzier (tspat@de.ibm.com)
5 *   Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
6 *   Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
7 *   Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
8 *
9 * @remark Copyright 2002-2011 OProfile authors
10 */
11
12#include <linux/oprofile.h>
13#include <linux/perf_event.h>
14#include <linux/init.h>
15#include <linux/errno.h>
16#include <linux/fs.h>
17#include <linux/module.h>
18#include <asm/processor.h>
19
20#include "../../../drivers/oprofile/oprof.h"
21
22extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
23
24#include "hwsampler.h"
25#include "op_counter.h"
26
27#define DEFAULT_INTERVAL	4127518
28
29#define DEFAULT_SDBT_BLOCKS	1
30#define DEFAULT_SDB_BLOCKS	511
31
32static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL;
33static unsigned long oprofile_min_interval;
34static unsigned long oprofile_max_interval;
35
36static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
37static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
38
39static int hwsampler_enabled;
40static int hwsampler_running;	/* start_mutex must be held to change */
41static int hwsampler_available;
42
43static struct oprofile_operations timer_ops;
44
45struct op_counter_config counter_config;
46
47enum __force_cpu_type {
48	reserved = 0,		/* do not force */
49	timer,
50};
51static int force_cpu_type;
52
53static int set_cpu_type(const char *str, struct kernel_param *kp)
54{
55	if (!strcmp(str, "timer")) {
56		force_cpu_type = timer;
57		printk(KERN_INFO "oprofile: forcing timer to be returned "
58		                 "as cpu type\n");
59	} else {
60		force_cpu_type = 0;
61	}
62
63	return 0;
64}
65module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
66MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
67		           "(report cpu_type \"timer\"");
68
69static int __oprofile_hwsampler_start(void)
70{
71	int retval;
72
73	retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
74	if (retval)
75		return retval;
76
77	retval = hwsampler_start_all(oprofile_hw_interval);
78	if (retval)
79		hwsampler_deallocate();
80
81	return retval;
82}
83
84static int oprofile_hwsampler_start(void)
85{
86	int retval;
87
88	hwsampler_running = hwsampler_enabled;
89
90	if (!hwsampler_running)
91		return timer_ops.start();
92
93	retval = perf_reserve_sampling();
94	if (retval)
95		return retval;
96
97	retval = __oprofile_hwsampler_start();
98	if (retval)
99		perf_release_sampling();
100
101	return retval;
102}
103
104static void oprofile_hwsampler_stop(void)
105{
106	if (!hwsampler_running) {
107		timer_ops.stop();
108		return;
109	}
110
111	hwsampler_stop_all();
112	hwsampler_deallocate();
113	perf_release_sampling();
114	return;
115}
116
117/*
118 * File ops used for:
119 * /dev/oprofile/0/enabled
120 * /dev/oprofile/hwsampling/hwsampler  (cpu_type = timer)
121 */
122
123static ssize_t hwsampler_read(struct file *file, char __user *buf,
124		size_t count, loff_t *offset)
125{
126	return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
127}
128
129static ssize_t hwsampler_write(struct file *file, char const __user *buf,
130		size_t count, loff_t *offset)
131{
132	unsigned long val;
133	int retval;
134
135	if (*offset)
136		return -EINVAL;
137
138	retval = oprofilefs_ulong_from_user(&val, buf, count);
139	if (retval <= 0)
140		return retval;
141
142	if (val != 0 && val != 1)
143		return -EINVAL;
144
145	if (oprofile_started)
146		/*
147		 * save to do without locking as we set
148		 * hwsampler_running in start() when start_mutex is
149		 * held
150		 */
151		return -EBUSY;
152
153	hwsampler_enabled = val;
154
155	return count;
156}
157
158static const struct file_operations hwsampler_fops = {
159	.read		= hwsampler_read,
160	.write		= hwsampler_write,
161};
162
163/*
164 * File ops used for:
165 * /dev/oprofile/0/count
166 * /dev/oprofile/hwsampling/hw_interval  (cpu_type = timer)
167 *
168 * Make sure that the value is within the hardware range.
169 */
170
171static ssize_t hw_interval_read(struct file *file, char __user *buf,
172				size_t count, loff_t *offset)
173{
174	return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
175					count, offset);
176}
177
178static ssize_t hw_interval_write(struct file *file, char const __user *buf,
179				 size_t count, loff_t *offset)
180{
181	unsigned long val;
182	int retval;
183
184	if (*offset)
185		return -EINVAL;
186	retval = oprofilefs_ulong_from_user(&val, buf, count);
187	if (retval <= 0)
188		return retval;
189	if (val < oprofile_min_interval)
190		oprofile_hw_interval = oprofile_min_interval;
191	else if (val > oprofile_max_interval)
192		oprofile_hw_interval = oprofile_max_interval;
193	else
194		oprofile_hw_interval = val;
195
196	return count;
197}
198
199static const struct file_operations hw_interval_fops = {
200	.read		= hw_interval_read,
201	.write		= hw_interval_write,
202};
203
204/*
205 * File ops used for:
206 * /dev/oprofile/0/event
207 * Only a single event with number 0 is supported with this counter.
208 *
209 * /dev/oprofile/0/unit_mask
210 * This is a dummy file needed by the user space tools.
211 * No value other than 0 is accepted or returned.
212 */
213
214static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
215				    size_t count, loff_t *offset)
216{
217	return oprofilefs_ulong_to_user(0, buf, count, offset);
218}
219
220static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
221				     size_t count, loff_t *offset)
222{
223	unsigned long val;
224	int retval;
225
226	if (*offset)
227		return -EINVAL;
228
229	retval = oprofilefs_ulong_from_user(&val, buf, count);
230	if (retval <= 0)
231		return retval;
232	if (val != 0)
233		return -EINVAL;
234	return count;
235}
236
237static const struct file_operations zero_fops = {
238	.read		= hwsampler_zero_read,
239	.write		= hwsampler_zero_write,
240};
241
242/* /dev/oprofile/0/kernel file ops.  */
243
244static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
245				     size_t count, loff_t *offset)
246{
247	return oprofilefs_ulong_to_user(counter_config.kernel,
248					buf, count, offset);
249}
250
251static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
252				      size_t count, loff_t *offset)
253{
254	unsigned long val;
255	int retval;
256
257	if (*offset)
258		return -EINVAL;
259
260	retval = oprofilefs_ulong_from_user(&val, buf, count);
261	if (retval <= 0)
262		return retval;
263
264	if (val != 0 && val != 1)
265		return -EINVAL;
266
267	counter_config.kernel = val;
268
269	return count;
270}
271
272static const struct file_operations kernel_fops = {
273	.read		= hwsampler_kernel_read,
274	.write		= hwsampler_kernel_write,
275};
276
277/* /dev/oprofile/0/user file ops. */
278
279static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
280				   size_t count, loff_t *offset)
281{
282	return oprofilefs_ulong_to_user(counter_config.user,
283					buf, count, offset);
284}
285
286static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
287				    size_t count, loff_t *offset)
288{
289	unsigned long val;
290	int retval;
291
292	if (*offset)
293		return -EINVAL;
294
295	retval = oprofilefs_ulong_from_user(&val, buf, count);
296	if (retval <= 0)
297		return retval;
298
299	if (val != 0 && val != 1)
300		return -EINVAL;
301
302	counter_config.user = val;
303
304	return count;
305}
306
307static const struct file_operations user_fops = {
308	.read		= hwsampler_user_read,
309	.write		= hwsampler_user_write,
310};
311
312
313/*
314 * File ops used for: /dev/oprofile/timer/enabled
315 * The value always has to be the inverted value of hwsampler_enabled. So
316 * no separate variable is created. That way we do not need locking.
317 */
318
319static ssize_t timer_enabled_read(struct file *file, char __user *buf,
320				  size_t count, loff_t *offset)
321{
322	return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
323}
324
325static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
326				   size_t count, loff_t *offset)
327{
328	unsigned long val;
329	int retval;
330
331	if (*offset)
332		return -EINVAL;
333
334	retval = oprofilefs_ulong_from_user(&val, buf, count);
335	if (retval <= 0)
336		return retval;
337
338	if (val != 0 && val != 1)
339		return -EINVAL;
340
341	/* Timer cannot be disabled without having hardware sampling.  */
342	if (val == 0 && !hwsampler_available)
343		return -EINVAL;
344
345	if (oprofile_started)
346		/*
347		 * save to do without locking as we set
348		 * hwsampler_running in start() when start_mutex is
349		 * held
350		 */
351		return -EBUSY;
352
353	hwsampler_enabled = !val;
354
355	return count;
356}
357
358static const struct file_operations timer_enabled_fops = {
359	.read		= timer_enabled_read,
360	.write		= timer_enabled_write,
361};
362
363
364static int oprofile_create_hwsampling_files(struct dentry *root)
365{
366	struct dentry *dir;
367
368	dir = oprofilefs_mkdir(root, "timer");
369	if (!dir)
370		return -EINVAL;
371
372	oprofilefs_create_file(dir, "enabled", &timer_enabled_fops);
373
374	if (!hwsampler_available)
375		return 0;
376
377	/* reinitialize default values */
378	hwsampler_enabled = 1;
379	counter_config.kernel = 1;
380	counter_config.user = 1;
381
382	if (!force_cpu_type) {
383		/*
384		 * Create the counter file system.  A single virtual
385		 * counter is created which can be used to
386		 * enable/disable hardware sampling dynamically from
387		 * user space.  The user space will configure a single
388		 * counter with a single event.  The value of 'event'
389		 * and 'unit_mask' are not evaluated by the kernel code
390		 * and can only be set to 0.
391		 */
392
393		dir = oprofilefs_mkdir(root, "0");
394		if (!dir)
395			return -EINVAL;
396
397		oprofilefs_create_file(dir, "enabled", &hwsampler_fops);
398		oprofilefs_create_file(dir, "event", &zero_fops);
399		oprofilefs_create_file(dir, "count", &hw_interval_fops);
400		oprofilefs_create_file(dir, "unit_mask", &zero_fops);
401		oprofilefs_create_file(dir, "kernel", &kernel_fops);
402		oprofilefs_create_file(dir, "user", &user_fops);
403		oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
404					&oprofile_sdbt_blocks);
405
406	} else {
407		/*
408		 * Hardware sampling can be used but the cpu_type is
409		 * forced to timer in order to deal with legacy user
410		 * space tools.  The /dev/oprofile/hwsampling fs is
411		 * provided in that case.
412		 */
413		dir = oprofilefs_mkdir(root, "hwsampling");
414		if (!dir)
415			return -EINVAL;
416
417		oprofilefs_create_file(dir, "hwsampler",
418				       &hwsampler_fops);
419		oprofilefs_create_file(dir, "hw_interval",
420				       &hw_interval_fops);
421		oprofilefs_create_ro_ulong(dir, "hw_min_interval",
422					   &oprofile_min_interval);
423		oprofilefs_create_ro_ulong(dir, "hw_max_interval",
424					   &oprofile_max_interval);
425		oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
426					&oprofile_sdbt_blocks);
427	}
428	return 0;
429}
430
431static int oprofile_hwsampler_init(struct oprofile_operations *ops)
432{
433	/*
434	 * Initialize the timer mode infrastructure as well in order
435	 * to be able to switch back dynamically.  oprofile_timer_init
436	 * is not supposed to fail.
437	 */
438	if (oprofile_timer_init(ops))
439		BUG();
440
441	memcpy(&timer_ops, ops, sizeof(timer_ops));
442	ops->create_files = oprofile_create_hwsampling_files;
443
444	/*
445	 * If the user space tools do not support newer cpu types,
446	 * the force_cpu_type module parameter
447	 * can be used to always return \"timer\" as cpu type.
448	 */
449	if (force_cpu_type != timer) {
450		struct cpuid id;
451
452		get_cpu_id (&id);
453
454		switch (id.machine) {
455		case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
456		case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
457		case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
458		default: return -ENODEV;
459		}
460	}
461
462	if (hwsampler_setup())
463		return -ENODEV;
464
465	/*
466	 * Query the range for the sampling interval from the
467	 * hardware.
468	 */
469	oprofile_min_interval = hwsampler_query_min_interval();
470	if (oprofile_min_interval == 0)
471		return -ENODEV;
472	oprofile_max_interval = hwsampler_query_max_interval();
473	if (oprofile_max_interval == 0)
474		return -ENODEV;
475
476	/* The initial value should be sane */
477	if (oprofile_hw_interval < oprofile_min_interval)
478		oprofile_hw_interval = oprofile_min_interval;
479	if (oprofile_hw_interval > oprofile_max_interval)
480		oprofile_hw_interval = oprofile_max_interval;
481
482	printk(KERN_INFO "oprofile: System z hardware sampling "
483	       "facility found.\n");
484
485	ops->start = oprofile_hwsampler_start;
486	ops->stop = oprofile_hwsampler_stop;
487
488	return 0;
489}
490
491static void oprofile_hwsampler_exit(void)
492{
493	hwsampler_shutdown();
494}
495
496int __init oprofile_arch_init(struct oprofile_operations *ops)
497{
498	ops->backtrace = s390_backtrace;
499
500	/*
501	 * -ENODEV is not reported to the caller.  The module itself
502         * will use the timer mode sampling as fallback and this is
503         * always available.
504	 */
505	hwsampler_available = oprofile_hwsampler_init(ops) == 0;
506
507	return 0;
508}
509
510void oprofile_arch_exit(void)
511{
512	oprofile_hwsampler_exit();
513}
514