1/* 2 * S390 Version 3 * Copyright IBM Corp. 2002, 2011 4 * Author(s): Thomas Spatzier (tspat@de.ibm.com) 5 * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com) 6 * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com) 7 * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com) 8 * 9 * @remark Copyright 2002-2011 OProfile authors 10 */ 11 12#include <linux/oprofile.h> 13#include <linux/perf_event.h> 14#include <linux/init.h> 15#include <linux/errno.h> 16#include <linux/fs.h> 17#include <linux/module.h> 18#include <asm/processor.h> 19 20#include "../../../drivers/oprofile/oprof.h" 21 22extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth); 23 24#include "hwsampler.h" 25#include "op_counter.h" 26 27#define DEFAULT_INTERVAL 4127518 28 29#define DEFAULT_SDBT_BLOCKS 1 30#define DEFAULT_SDB_BLOCKS 511 31 32static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL; 33static unsigned long oprofile_min_interval; 34static unsigned long oprofile_max_interval; 35 36static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS; 37static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS; 38 39static int hwsampler_enabled; 40static int hwsampler_running; /* start_mutex must be held to change */ 41static int hwsampler_available; 42 43static struct oprofile_operations timer_ops; 44 45struct op_counter_config counter_config; 46 47enum __force_cpu_type { 48 reserved = 0, /* do not force */ 49 timer, 50}; 51static int force_cpu_type; 52 53static int set_cpu_type(const char *str, struct kernel_param *kp) 54{ 55 if (!strcmp(str, "timer")) { 56 force_cpu_type = timer; 57 printk(KERN_INFO "oprofile: forcing timer to be returned " 58 "as cpu type\n"); 59 } else { 60 force_cpu_type = 0; 61 } 62 63 return 0; 64} 65module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); 66MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling" 67 "(report cpu_type \"timer\""); 68 69static int __oprofile_hwsampler_start(void) 70{ 71 int retval; 72 73 retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks); 74 if (retval) 75 return retval; 76 77 retval = hwsampler_start_all(oprofile_hw_interval); 78 if (retval) 79 hwsampler_deallocate(); 80 81 return retval; 82} 83 84static int oprofile_hwsampler_start(void) 85{ 86 int retval; 87 88 hwsampler_running = hwsampler_enabled; 89 90 if (!hwsampler_running) 91 return timer_ops.start(); 92 93 retval = perf_reserve_sampling(); 94 if (retval) 95 return retval; 96 97 retval = __oprofile_hwsampler_start(); 98 if (retval) 99 perf_release_sampling(); 100 101 return retval; 102} 103 104static void oprofile_hwsampler_stop(void) 105{ 106 if (!hwsampler_running) { 107 timer_ops.stop(); 108 return; 109 } 110 111 hwsampler_stop_all(); 112 hwsampler_deallocate(); 113 perf_release_sampling(); 114 return; 115} 116 117/* 118 * File ops used for: 119 * /dev/oprofile/0/enabled 120 * /dev/oprofile/hwsampling/hwsampler (cpu_type = timer) 121 */ 122 123static ssize_t hwsampler_read(struct file *file, char __user *buf, 124 size_t count, loff_t *offset) 125{ 126 return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset); 127} 128 129static ssize_t hwsampler_write(struct file *file, char const __user *buf, 130 size_t count, loff_t *offset) 131{ 132 unsigned long val; 133 int retval; 134 135 if (*offset) 136 return -EINVAL; 137 138 retval = oprofilefs_ulong_from_user(&val, buf, count); 139 if (retval <= 0) 140 return retval; 141 142 if (val != 0 && val != 1) 143 return -EINVAL; 144 145 if (oprofile_started) 146 /* 147 * save to do without locking as we set 148 * hwsampler_running in start() when start_mutex is 149 * held 150 */ 151 return -EBUSY; 152 153 hwsampler_enabled = val; 154 155 return count; 156} 157 158static const struct file_operations hwsampler_fops = { 159 .read = hwsampler_read, 160 .write = hwsampler_write, 161}; 162 163/* 164 * File ops used for: 165 * /dev/oprofile/0/count 166 * /dev/oprofile/hwsampling/hw_interval (cpu_type = timer) 167 * 168 * Make sure that the value is within the hardware range. 169 */ 170 171static ssize_t hw_interval_read(struct file *file, char __user *buf, 172 size_t count, loff_t *offset) 173{ 174 return oprofilefs_ulong_to_user(oprofile_hw_interval, buf, 175 count, offset); 176} 177 178static ssize_t hw_interval_write(struct file *file, char const __user *buf, 179 size_t count, loff_t *offset) 180{ 181 unsigned long val; 182 int retval; 183 184 if (*offset) 185 return -EINVAL; 186 retval = oprofilefs_ulong_from_user(&val, buf, count); 187 if (retval <= 0) 188 return retval; 189 if (val < oprofile_min_interval) 190 oprofile_hw_interval = oprofile_min_interval; 191 else if (val > oprofile_max_interval) 192 oprofile_hw_interval = oprofile_max_interval; 193 else 194 oprofile_hw_interval = val; 195 196 return count; 197} 198 199static const struct file_operations hw_interval_fops = { 200 .read = hw_interval_read, 201 .write = hw_interval_write, 202}; 203 204/* 205 * File ops used for: 206 * /dev/oprofile/0/event 207 * Only a single event with number 0 is supported with this counter. 208 * 209 * /dev/oprofile/0/unit_mask 210 * This is a dummy file needed by the user space tools. 211 * No value other than 0 is accepted or returned. 212 */ 213 214static ssize_t hwsampler_zero_read(struct file *file, char __user *buf, 215 size_t count, loff_t *offset) 216{ 217 return oprofilefs_ulong_to_user(0, buf, count, offset); 218} 219 220static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf, 221 size_t count, loff_t *offset) 222{ 223 unsigned long val; 224 int retval; 225 226 if (*offset) 227 return -EINVAL; 228 229 retval = oprofilefs_ulong_from_user(&val, buf, count); 230 if (retval <= 0) 231 return retval; 232 if (val != 0) 233 return -EINVAL; 234 return count; 235} 236 237static const struct file_operations zero_fops = { 238 .read = hwsampler_zero_read, 239 .write = hwsampler_zero_write, 240}; 241 242/* /dev/oprofile/0/kernel file ops. */ 243 244static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf, 245 size_t count, loff_t *offset) 246{ 247 return oprofilefs_ulong_to_user(counter_config.kernel, 248 buf, count, offset); 249} 250 251static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf, 252 size_t count, loff_t *offset) 253{ 254 unsigned long val; 255 int retval; 256 257 if (*offset) 258 return -EINVAL; 259 260 retval = oprofilefs_ulong_from_user(&val, buf, count); 261 if (retval <= 0) 262 return retval; 263 264 if (val != 0 && val != 1) 265 return -EINVAL; 266 267 counter_config.kernel = val; 268 269 return count; 270} 271 272static const struct file_operations kernel_fops = { 273 .read = hwsampler_kernel_read, 274 .write = hwsampler_kernel_write, 275}; 276 277/* /dev/oprofile/0/user file ops. */ 278 279static ssize_t hwsampler_user_read(struct file *file, char __user *buf, 280 size_t count, loff_t *offset) 281{ 282 return oprofilefs_ulong_to_user(counter_config.user, 283 buf, count, offset); 284} 285 286static ssize_t hwsampler_user_write(struct file *file, char const __user *buf, 287 size_t count, loff_t *offset) 288{ 289 unsigned long val; 290 int retval; 291 292 if (*offset) 293 return -EINVAL; 294 295 retval = oprofilefs_ulong_from_user(&val, buf, count); 296 if (retval <= 0) 297 return retval; 298 299 if (val != 0 && val != 1) 300 return -EINVAL; 301 302 counter_config.user = val; 303 304 return count; 305} 306 307static const struct file_operations user_fops = { 308 .read = hwsampler_user_read, 309 .write = hwsampler_user_write, 310}; 311 312 313/* 314 * File ops used for: /dev/oprofile/timer/enabled 315 * The value always has to be the inverted value of hwsampler_enabled. So 316 * no separate variable is created. That way we do not need locking. 317 */ 318 319static ssize_t timer_enabled_read(struct file *file, char __user *buf, 320 size_t count, loff_t *offset) 321{ 322 return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset); 323} 324 325static ssize_t timer_enabled_write(struct file *file, char const __user *buf, 326 size_t count, loff_t *offset) 327{ 328 unsigned long val; 329 int retval; 330 331 if (*offset) 332 return -EINVAL; 333 334 retval = oprofilefs_ulong_from_user(&val, buf, count); 335 if (retval <= 0) 336 return retval; 337 338 if (val != 0 && val != 1) 339 return -EINVAL; 340 341 /* Timer cannot be disabled without having hardware sampling. */ 342 if (val == 0 && !hwsampler_available) 343 return -EINVAL; 344 345 if (oprofile_started) 346 /* 347 * save to do without locking as we set 348 * hwsampler_running in start() when start_mutex is 349 * held 350 */ 351 return -EBUSY; 352 353 hwsampler_enabled = !val; 354 355 return count; 356} 357 358static const struct file_operations timer_enabled_fops = { 359 .read = timer_enabled_read, 360 .write = timer_enabled_write, 361}; 362 363 364static int oprofile_create_hwsampling_files(struct dentry *root) 365{ 366 struct dentry *dir; 367 368 dir = oprofilefs_mkdir(root, "timer"); 369 if (!dir) 370 return -EINVAL; 371 372 oprofilefs_create_file(dir, "enabled", &timer_enabled_fops); 373 374 if (!hwsampler_available) 375 return 0; 376 377 /* reinitialize default values */ 378 hwsampler_enabled = 1; 379 counter_config.kernel = 1; 380 counter_config.user = 1; 381 382 if (!force_cpu_type) { 383 /* 384 * Create the counter file system. A single virtual 385 * counter is created which can be used to 386 * enable/disable hardware sampling dynamically from 387 * user space. The user space will configure a single 388 * counter with a single event. The value of 'event' 389 * and 'unit_mask' are not evaluated by the kernel code 390 * and can only be set to 0. 391 */ 392 393 dir = oprofilefs_mkdir(root, "0"); 394 if (!dir) 395 return -EINVAL; 396 397 oprofilefs_create_file(dir, "enabled", &hwsampler_fops); 398 oprofilefs_create_file(dir, "event", &zero_fops); 399 oprofilefs_create_file(dir, "count", &hw_interval_fops); 400 oprofilefs_create_file(dir, "unit_mask", &zero_fops); 401 oprofilefs_create_file(dir, "kernel", &kernel_fops); 402 oprofilefs_create_file(dir, "user", &user_fops); 403 oprofilefs_create_ulong(dir, "hw_sdbt_blocks", 404 &oprofile_sdbt_blocks); 405 406 } else { 407 /* 408 * Hardware sampling can be used but the cpu_type is 409 * forced to timer in order to deal with legacy user 410 * space tools. The /dev/oprofile/hwsampling fs is 411 * provided in that case. 412 */ 413 dir = oprofilefs_mkdir(root, "hwsampling"); 414 if (!dir) 415 return -EINVAL; 416 417 oprofilefs_create_file(dir, "hwsampler", 418 &hwsampler_fops); 419 oprofilefs_create_file(dir, "hw_interval", 420 &hw_interval_fops); 421 oprofilefs_create_ro_ulong(dir, "hw_min_interval", 422 &oprofile_min_interval); 423 oprofilefs_create_ro_ulong(dir, "hw_max_interval", 424 &oprofile_max_interval); 425 oprofilefs_create_ulong(dir, "hw_sdbt_blocks", 426 &oprofile_sdbt_blocks); 427 } 428 return 0; 429} 430 431static int oprofile_hwsampler_init(struct oprofile_operations *ops) 432{ 433 /* 434 * Initialize the timer mode infrastructure as well in order 435 * to be able to switch back dynamically. oprofile_timer_init 436 * is not supposed to fail. 437 */ 438 if (oprofile_timer_init(ops)) 439 BUG(); 440 441 memcpy(&timer_ops, ops, sizeof(timer_ops)); 442 ops->create_files = oprofile_create_hwsampling_files; 443 444 /* 445 * If the user space tools do not support newer cpu types, 446 * the force_cpu_type module parameter 447 * can be used to always return \"timer\" as cpu type. 448 */ 449 if (force_cpu_type != timer) { 450 struct cpuid id; 451 452 get_cpu_id (&id); 453 454 switch (id.machine) { 455 case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; 456 case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; 457 case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break; 458 default: return -ENODEV; 459 } 460 } 461 462 if (hwsampler_setup()) 463 return -ENODEV; 464 465 /* 466 * Query the range for the sampling interval from the 467 * hardware. 468 */ 469 oprofile_min_interval = hwsampler_query_min_interval(); 470 if (oprofile_min_interval == 0) 471 return -ENODEV; 472 oprofile_max_interval = hwsampler_query_max_interval(); 473 if (oprofile_max_interval == 0) 474 return -ENODEV; 475 476 /* The initial value should be sane */ 477 if (oprofile_hw_interval < oprofile_min_interval) 478 oprofile_hw_interval = oprofile_min_interval; 479 if (oprofile_hw_interval > oprofile_max_interval) 480 oprofile_hw_interval = oprofile_max_interval; 481 482 printk(KERN_INFO "oprofile: System z hardware sampling " 483 "facility found.\n"); 484 485 ops->start = oprofile_hwsampler_start; 486 ops->stop = oprofile_hwsampler_stop; 487 488 return 0; 489} 490 491static void oprofile_hwsampler_exit(void) 492{ 493 hwsampler_shutdown(); 494} 495 496int __init oprofile_arch_init(struct oprofile_operations *ops) 497{ 498 ops->backtrace = s390_backtrace; 499 500 /* 501 * -ENODEV is not reported to the caller. The module itself 502 * will use the timer mode sampling as fallback and this is 503 * always available. 504 */ 505 hwsampler_available = oprofile_hwsampler_init(ops) == 0; 506 507 return 0; 508} 509 510void oprofile_arch_exit(void) 511{ 512 oprofile_hwsampler_exit(); 513} 514