1 /*
2  * S390 Version
3  *   Copyright IBM Corp. 2002, 2011
4  *   Author(s): Thomas Spatzier (tspat@de.ibm.com)
5  *   Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
6  *   Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
7  *   Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
8  *
9  * @remark Copyright 2002-2011 OProfile authors
10  */
11 
12 #include <linux/oprofile.h>
13 #include <linux/perf_event.h>
14 #include <linux/init.h>
15 #include <linux/errno.h>
16 #include <linux/fs.h>
17 #include <linux/module.h>
18 #include <asm/processor.h>
19 
20 #include "../../../drivers/oprofile/oprof.h"
21 
22 extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
23 
24 #include "hwsampler.h"
25 #include "op_counter.h"
26 
27 #define DEFAULT_INTERVAL	4127518
28 
29 #define DEFAULT_SDBT_BLOCKS	1
30 #define DEFAULT_SDB_BLOCKS	511
31 
32 static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL;
33 static unsigned long oprofile_min_interval;
34 static unsigned long oprofile_max_interval;
35 
36 static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
37 static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
38 
39 static int hwsampler_enabled;
40 static int hwsampler_running;	/* start_mutex must be held to change */
41 static int hwsampler_available;
42 
43 static struct oprofile_operations timer_ops;
44 
45 struct op_counter_config counter_config;
46 
47 enum __force_cpu_type {
48 	reserved = 0,		/* do not force */
49 	timer,
50 };
51 static int force_cpu_type;
52 
set_cpu_type(const char * str,struct kernel_param * kp)53 static int set_cpu_type(const char *str, struct kernel_param *kp)
54 {
55 	if (!strcmp(str, "timer")) {
56 		force_cpu_type = timer;
57 		printk(KERN_INFO "oprofile: forcing timer to be returned "
58 		                 "as cpu type\n");
59 	} else {
60 		force_cpu_type = 0;
61 	}
62 
63 	return 0;
64 }
65 module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
66 MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
67 		           "(report cpu_type \"timer\"");
68 
__oprofile_hwsampler_start(void)69 static int __oprofile_hwsampler_start(void)
70 {
71 	int retval;
72 
73 	retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
74 	if (retval)
75 		return retval;
76 
77 	retval = hwsampler_start_all(oprofile_hw_interval);
78 	if (retval)
79 		hwsampler_deallocate();
80 
81 	return retval;
82 }
83 
oprofile_hwsampler_start(void)84 static int oprofile_hwsampler_start(void)
85 {
86 	int retval;
87 
88 	hwsampler_running = hwsampler_enabled;
89 
90 	if (!hwsampler_running)
91 		return timer_ops.start();
92 
93 	retval = perf_reserve_sampling();
94 	if (retval)
95 		return retval;
96 
97 	retval = __oprofile_hwsampler_start();
98 	if (retval)
99 		perf_release_sampling();
100 
101 	return retval;
102 }
103 
oprofile_hwsampler_stop(void)104 static void oprofile_hwsampler_stop(void)
105 {
106 	if (!hwsampler_running) {
107 		timer_ops.stop();
108 		return;
109 	}
110 
111 	hwsampler_stop_all();
112 	hwsampler_deallocate();
113 	perf_release_sampling();
114 	return;
115 }
116 
117 /*
118  * File ops used for:
119  * /dev/oprofile/0/enabled
120  * /dev/oprofile/hwsampling/hwsampler  (cpu_type = timer)
121  */
122 
hwsampler_read(struct file * file,char __user * buf,size_t count,loff_t * offset)123 static ssize_t hwsampler_read(struct file *file, char __user *buf,
124 		size_t count, loff_t *offset)
125 {
126 	return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
127 }
128 
hwsampler_write(struct file * file,char const __user * buf,size_t count,loff_t * offset)129 static ssize_t hwsampler_write(struct file *file, char const __user *buf,
130 		size_t count, loff_t *offset)
131 {
132 	unsigned long val;
133 	int retval;
134 
135 	if (*offset)
136 		return -EINVAL;
137 
138 	retval = oprofilefs_ulong_from_user(&val, buf, count);
139 	if (retval <= 0)
140 		return retval;
141 
142 	if (val != 0 && val != 1)
143 		return -EINVAL;
144 
145 	if (oprofile_started)
146 		/*
147 		 * save to do without locking as we set
148 		 * hwsampler_running in start() when start_mutex is
149 		 * held
150 		 */
151 		return -EBUSY;
152 
153 	hwsampler_enabled = val;
154 
155 	return count;
156 }
157 
158 static const struct file_operations hwsampler_fops = {
159 	.read		= hwsampler_read,
160 	.write		= hwsampler_write,
161 };
162 
163 /*
164  * File ops used for:
165  * /dev/oprofile/0/count
166  * /dev/oprofile/hwsampling/hw_interval  (cpu_type = timer)
167  *
168  * Make sure that the value is within the hardware range.
169  */
170 
hw_interval_read(struct file * file,char __user * buf,size_t count,loff_t * offset)171 static ssize_t hw_interval_read(struct file *file, char __user *buf,
172 				size_t count, loff_t *offset)
173 {
174 	return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
175 					count, offset);
176 }
177 
hw_interval_write(struct file * file,char const __user * buf,size_t count,loff_t * offset)178 static ssize_t hw_interval_write(struct file *file, char const __user *buf,
179 				 size_t count, loff_t *offset)
180 {
181 	unsigned long val;
182 	int retval;
183 
184 	if (*offset)
185 		return -EINVAL;
186 	retval = oprofilefs_ulong_from_user(&val, buf, count);
187 	if (retval <= 0)
188 		return retval;
189 	if (val < oprofile_min_interval)
190 		oprofile_hw_interval = oprofile_min_interval;
191 	else if (val > oprofile_max_interval)
192 		oprofile_hw_interval = oprofile_max_interval;
193 	else
194 		oprofile_hw_interval = val;
195 
196 	return count;
197 }
198 
199 static const struct file_operations hw_interval_fops = {
200 	.read		= hw_interval_read,
201 	.write		= hw_interval_write,
202 };
203 
204 /*
205  * File ops used for:
206  * /dev/oprofile/0/event
207  * Only a single event with number 0 is supported with this counter.
208  *
209  * /dev/oprofile/0/unit_mask
210  * This is a dummy file needed by the user space tools.
211  * No value other than 0 is accepted or returned.
212  */
213 
hwsampler_zero_read(struct file * file,char __user * buf,size_t count,loff_t * offset)214 static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
215 				    size_t count, loff_t *offset)
216 {
217 	return oprofilefs_ulong_to_user(0, buf, count, offset);
218 }
219 
hwsampler_zero_write(struct file * file,char const __user * buf,size_t count,loff_t * offset)220 static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
221 				     size_t count, loff_t *offset)
222 {
223 	unsigned long val;
224 	int retval;
225 
226 	if (*offset)
227 		return -EINVAL;
228 
229 	retval = oprofilefs_ulong_from_user(&val, buf, count);
230 	if (retval <= 0)
231 		return retval;
232 	if (val != 0)
233 		return -EINVAL;
234 	return count;
235 }
236 
237 static const struct file_operations zero_fops = {
238 	.read		= hwsampler_zero_read,
239 	.write		= hwsampler_zero_write,
240 };
241 
242 /* /dev/oprofile/0/kernel file ops.  */
243 
hwsampler_kernel_read(struct file * file,char __user * buf,size_t count,loff_t * offset)244 static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
245 				     size_t count, loff_t *offset)
246 {
247 	return oprofilefs_ulong_to_user(counter_config.kernel,
248 					buf, count, offset);
249 }
250 
hwsampler_kernel_write(struct file * file,char const __user * buf,size_t count,loff_t * offset)251 static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
252 				      size_t count, loff_t *offset)
253 {
254 	unsigned long val;
255 	int retval;
256 
257 	if (*offset)
258 		return -EINVAL;
259 
260 	retval = oprofilefs_ulong_from_user(&val, buf, count);
261 	if (retval <= 0)
262 		return retval;
263 
264 	if (val != 0 && val != 1)
265 		return -EINVAL;
266 
267 	counter_config.kernel = val;
268 
269 	return count;
270 }
271 
272 static const struct file_operations kernel_fops = {
273 	.read		= hwsampler_kernel_read,
274 	.write		= hwsampler_kernel_write,
275 };
276 
277 /* /dev/oprofile/0/user file ops. */
278 
hwsampler_user_read(struct file * file,char __user * buf,size_t count,loff_t * offset)279 static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
280 				   size_t count, loff_t *offset)
281 {
282 	return oprofilefs_ulong_to_user(counter_config.user,
283 					buf, count, offset);
284 }
285 
hwsampler_user_write(struct file * file,char const __user * buf,size_t count,loff_t * offset)286 static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
287 				    size_t count, loff_t *offset)
288 {
289 	unsigned long val;
290 	int retval;
291 
292 	if (*offset)
293 		return -EINVAL;
294 
295 	retval = oprofilefs_ulong_from_user(&val, buf, count);
296 	if (retval <= 0)
297 		return retval;
298 
299 	if (val != 0 && val != 1)
300 		return -EINVAL;
301 
302 	counter_config.user = val;
303 
304 	return count;
305 }
306 
307 static const struct file_operations user_fops = {
308 	.read		= hwsampler_user_read,
309 	.write		= hwsampler_user_write,
310 };
311 
312 
313 /*
314  * File ops used for: /dev/oprofile/timer/enabled
315  * The value always has to be the inverted value of hwsampler_enabled. So
316  * no separate variable is created. That way we do not need locking.
317  */
318 
timer_enabled_read(struct file * file,char __user * buf,size_t count,loff_t * offset)319 static ssize_t timer_enabled_read(struct file *file, char __user *buf,
320 				  size_t count, loff_t *offset)
321 {
322 	return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
323 }
324 
timer_enabled_write(struct file * file,char const __user * buf,size_t count,loff_t * offset)325 static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
326 				   size_t count, loff_t *offset)
327 {
328 	unsigned long val;
329 	int retval;
330 
331 	if (*offset)
332 		return -EINVAL;
333 
334 	retval = oprofilefs_ulong_from_user(&val, buf, count);
335 	if (retval <= 0)
336 		return retval;
337 
338 	if (val != 0 && val != 1)
339 		return -EINVAL;
340 
341 	/* Timer cannot be disabled without having hardware sampling.  */
342 	if (val == 0 && !hwsampler_available)
343 		return -EINVAL;
344 
345 	if (oprofile_started)
346 		/*
347 		 * save to do without locking as we set
348 		 * hwsampler_running in start() when start_mutex is
349 		 * held
350 		 */
351 		return -EBUSY;
352 
353 	hwsampler_enabled = !val;
354 
355 	return count;
356 }
357 
358 static const struct file_operations timer_enabled_fops = {
359 	.read		= timer_enabled_read,
360 	.write		= timer_enabled_write,
361 };
362 
363 
oprofile_create_hwsampling_files(struct dentry * root)364 static int oprofile_create_hwsampling_files(struct dentry *root)
365 {
366 	struct dentry *dir;
367 
368 	dir = oprofilefs_mkdir(root, "timer");
369 	if (!dir)
370 		return -EINVAL;
371 
372 	oprofilefs_create_file(dir, "enabled", &timer_enabled_fops);
373 
374 	if (!hwsampler_available)
375 		return 0;
376 
377 	/* reinitialize default values */
378 	hwsampler_enabled = 1;
379 	counter_config.kernel = 1;
380 	counter_config.user = 1;
381 
382 	if (!force_cpu_type) {
383 		/*
384 		 * Create the counter file system.  A single virtual
385 		 * counter is created which can be used to
386 		 * enable/disable hardware sampling dynamically from
387 		 * user space.  The user space will configure a single
388 		 * counter with a single event.  The value of 'event'
389 		 * and 'unit_mask' are not evaluated by the kernel code
390 		 * and can only be set to 0.
391 		 */
392 
393 		dir = oprofilefs_mkdir(root, "0");
394 		if (!dir)
395 			return -EINVAL;
396 
397 		oprofilefs_create_file(dir, "enabled", &hwsampler_fops);
398 		oprofilefs_create_file(dir, "event", &zero_fops);
399 		oprofilefs_create_file(dir, "count", &hw_interval_fops);
400 		oprofilefs_create_file(dir, "unit_mask", &zero_fops);
401 		oprofilefs_create_file(dir, "kernel", &kernel_fops);
402 		oprofilefs_create_file(dir, "user", &user_fops);
403 		oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
404 					&oprofile_sdbt_blocks);
405 
406 	} else {
407 		/*
408 		 * Hardware sampling can be used but the cpu_type is
409 		 * forced to timer in order to deal with legacy user
410 		 * space tools.  The /dev/oprofile/hwsampling fs is
411 		 * provided in that case.
412 		 */
413 		dir = oprofilefs_mkdir(root, "hwsampling");
414 		if (!dir)
415 			return -EINVAL;
416 
417 		oprofilefs_create_file(dir, "hwsampler",
418 				       &hwsampler_fops);
419 		oprofilefs_create_file(dir, "hw_interval",
420 				       &hw_interval_fops);
421 		oprofilefs_create_ro_ulong(dir, "hw_min_interval",
422 					   &oprofile_min_interval);
423 		oprofilefs_create_ro_ulong(dir, "hw_max_interval",
424 					   &oprofile_max_interval);
425 		oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
426 					&oprofile_sdbt_blocks);
427 	}
428 	return 0;
429 }
430 
oprofile_hwsampler_init(struct oprofile_operations * ops)431 static int oprofile_hwsampler_init(struct oprofile_operations *ops)
432 {
433 	/*
434 	 * Initialize the timer mode infrastructure as well in order
435 	 * to be able to switch back dynamically.  oprofile_timer_init
436 	 * is not supposed to fail.
437 	 */
438 	if (oprofile_timer_init(ops))
439 		BUG();
440 
441 	memcpy(&timer_ops, ops, sizeof(timer_ops));
442 	ops->create_files = oprofile_create_hwsampling_files;
443 
444 	/*
445 	 * If the user space tools do not support newer cpu types,
446 	 * the force_cpu_type module parameter
447 	 * can be used to always return \"timer\" as cpu type.
448 	 */
449 	if (force_cpu_type != timer) {
450 		struct cpuid id;
451 
452 		get_cpu_id (&id);
453 
454 		switch (id.machine) {
455 		case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
456 		case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
457 		case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
458 		default: return -ENODEV;
459 		}
460 	}
461 
462 	if (hwsampler_setup())
463 		return -ENODEV;
464 
465 	/*
466 	 * Query the range for the sampling interval from the
467 	 * hardware.
468 	 */
469 	oprofile_min_interval = hwsampler_query_min_interval();
470 	if (oprofile_min_interval == 0)
471 		return -ENODEV;
472 	oprofile_max_interval = hwsampler_query_max_interval();
473 	if (oprofile_max_interval == 0)
474 		return -ENODEV;
475 
476 	/* The initial value should be sane */
477 	if (oprofile_hw_interval < oprofile_min_interval)
478 		oprofile_hw_interval = oprofile_min_interval;
479 	if (oprofile_hw_interval > oprofile_max_interval)
480 		oprofile_hw_interval = oprofile_max_interval;
481 
482 	printk(KERN_INFO "oprofile: System z hardware sampling "
483 	       "facility found.\n");
484 
485 	ops->start = oprofile_hwsampler_start;
486 	ops->stop = oprofile_hwsampler_stop;
487 
488 	return 0;
489 }
490 
oprofile_hwsampler_exit(void)491 static void oprofile_hwsampler_exit(void)
492 {
493 	hwsampler_shutdown();
494 }
495 
oprofile_arch_init(struct oprofile_operations * ops)496 int __init oprofile_arch_init(struct oprofile_operations *ops)
497 {
498 	ops->backtrace = s390_backtrace;
499 
500 	/*
501 	 * -ENODEV is not reported to the caller.  The module itself
502          * will use the timer mode sampling as fallback and this is
503          * always available.
504 	 */
505 	hwsampler_available = oprofile_hwsampler_init(ops) == 0;
506 
507 	return 0;
508 }
509 
oprofile_arch_exit(void)510 void oprofile_arch_exit(void)
511 {
512 	oprofile_hwsampler_exit();
513 }
514