1/* 2 * kernel/time/timer_stats.c 3 * 4 * Collect timer usage statistics. 5 * 6 * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar 7 * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> 8 * 9 * timer_stats is based on timer_top, a similar functionality which was part of 10 * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the 11 * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based 12 * on dynamic allocation of the statistics entries and linear search based 13 * lookup combined with a global lock, rather than the static array, hash 14 * and per-CPU locking which is used by timer_stats. It was written for the 15 * pre hrtimer kernel code and therefore did not take hrtimers into account. 16 * Nevertheless it provided the base for the timer_stats implementation and 17 * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks 18 * for this effort. 19 * 20 * timer_top.c is 21 * Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus 22 * Written by Daniel Petrini <d.pensator@gmail.com> 23 * timer_top.c was released under the GNU General Public License version 2 24 * 25 * We export the addresses and counting of timer functions being called, 26 * the pid and cmdline from the owner process if applicable. 27 * 28 * Start/stop data collection: 29 * # echo [1|0] >/proc/timer_stats 30 * 31 * Display the information collected so far: 32 * # cat /proc/timer_stats 33 * 34 * This program is free software; you can redistribute it and/or modify 35 * it under the terms of the GNU General Public License version 2 as 36 * published by the Free Software Foundation. 37 */ 38 39#include <linux/proc_fs.h> 40#include <linux/module.h> 41#include <linux/spinlock.h> 42#include <linux/sched.h> 43#include <linux/seq_file.h> 44#include <linux/kallsyms.h> 45 46#include <asm/uaccess.h> 47 48/* 49 * This is our basic unit of interest: a timer expiry event identified 50 * by the timer, its start/expire functions and the PID of the task that 51 * started the timer. We count the number of times an event happens: 52 */ 53struct entry { 54 /* 55 * Hash list: 56 */ 57 struct entry *next; 58 59 /* 60 * Hash keys: 61 */ 62 void *timer; 63 void *start_func; 64 void *expire_func; 65 pid_t pid; 66 67 /* 68 * Number of timeout events: 69 */ 70 unsigned long count; 71 unsigned int timer_flag; 72 73 /* 74 * We save the command-line string to preserve 75 * this information past task exit: 76 */ 77 char comm[TASK_COMM_LEN + 1]; 78 79} ____cacheline_aligned_in_smp; 80 81/* 82 * Spinlock protecting the tables - not taken during lookup: 83 */ 84static DEFINE_RAW_SPINLOCK(table_lock); 85 86/* 87 * Per-CPU lookup locks for fast hash lookup: 88 */ 89static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock); 90 91/* 92 * Mutex to serialize state changes with show-stats activities: 93 */ 94static DEFINE_MUTEX(show_mutex); 95 96/* 97 * Collection status, active/inactive: 98 */ 99int __read_mostly timer_stats_active; 100 101/* 102 * Beginning/end timestamps of measurement: 103 */ 104static ktime_t time_start, time_stop; 105 106/* 107 * tstat entry structs only get allocated while collection is 108 * active and never freed during that time - this simplifies 109 * things quite a bit. 110 * 111 * They get freed when a new collection period is started. 112 */ 113#define MAX_ENTRIES_BITS 10 114#define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS) 115 116static unsigned long nr_entries; 117static struct entry entries[MAX_ENTRIES]; 118 119static atomic_t overflow_count; 120 121/* 122 * The entries are in a hash-table, for fast lookup: 123 */ 124#define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1) 125#define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS) 126#define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1) 127 128#define __tstat_hashfn(entry) \ 129 (((unsigned long)(entry)->timer ^ \ 130 (unsigned long)(entry)->start_func ^ \ 131 (unsigned long)(entry)->expire_func ^ \ 132 (unsigned long)(entry)->pid ) & TSTAT_HASH_MASK) 133 134#define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry)) 135 136static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly; 137 138static void reset_entries(void) 139{ 140 nr_entries = 0; 141 memset(entries, 0, sizeof(entries)); 142 memset(tstat_hash_table, 0, sizeof(tstat_hash_table)); 143 atomic_set(&overflow_count, 0); 144} 145 146static struct entry *alloc_entry(void) 147{ 148 if (nr_entries >= MAX_ENTRIES) 149 return NULL; 150 151 return entries + nr_entries++; 152} 153 154static int match_entries(struct entry *entry1, struct entry *entry2) 155{ 156 return entry1->timer == entry2->timer && 157 entry1->start_func == entry2->start_func && 158 entry1->expire_func == entry2->expire_func && 159 entry1->pid == entry2->pid; 160} 161 162/* 163 * Look up whether an entry matching this item is present 164 * in the hash already. Must be called with irqs off and the 165 * lookup lock held: 166 */ 167static struct entry *tstat_lookup(struct entry *entry, char *comm) 168{ 169 struct entry **head, *curr, *prev; 170 171 head = tstat_hashentry(entry); 172 curr = *head; 173 174 /* 175 * The fastpath is when the entry is already hashed, 176 * we do this with the lookup lock held, but with the 177 * table lock not held: 178 */ 179 while (curr) { 180 if (match_entries(curr, entry)) 181 return curr; 182 183 curr = curr->next; 184 } 185 /* 186 * Slowpath: allocate, set up and link a new hash entry: 187 */ 188 prev = NULL; 189 curr = *head; 190 191 raw_spin_lock(&table_lock); 192 /* 193 * Make sure we have not raced with another CPU: 194 */ 195 while (curr) { 196 if (match_entries(curr, entry)) 197 goto out_unlock; 198 199 prev = curr; 200 curr = curr->next; 201 } 202 203 curr = alloc_entry(); 204 if (curr) { 205 *curr = *entry; 206 curr->count = 0; 207 curr->next = NULL; 208 memcpy(curr->comm, comm, TASK_COMM_LEN); 209 210 smp_mb(); /* Ensure that curr is initialized before insert */ 211 212 if (prev) 213 prev->next = curr; 214 else 215 *head = curr; 216 } 217 out_unlock: 218 raw_spin_unlock(&table_lock); 219 220 return curr; 221} 222 223/** 224 * timer_stats_update_stats - Update the statistics for a timer. 225 * @timer: pointer to either a timer_list or a hrtimer 226 * @pid: the pid of the task which set up the timer 227 * @startf: pointer to the function which did the timer setup 228 * @timerf: pointer to the timer callback function of the timer 229 * @comm: name of the process which set up the timer 230 * 231 * When the timer is already registered, then the event counter is 232 * incremented. Otherwise the timer is registered in a free slot. 233 */ 234void timer_stats_update_stats(void *timer, pid_t pid, void *startf, 235 void *timerf, char *comm, 236 unsigned int timer_flag) 237{ 238 /* 239 * It doesn't matter which lock we take: 240 */ 241 raw_spinlock_t *lock; 242 struct entry *entry, input; 243 unsigned long flags; 244 245 if (likely(!timer_stats_active)) 246 return; 247 248 lock = &per_cpu(tstats_lookup_lock, raw_smp_processor_id()); 249 250 input.timer = timer; 251 input.start_func = startf; 252 input.expire_func = timerf; 253 input.pid = pid; 254 input.timer_flag = timer_flag; 255 256 raw_spin_lock_irqsave(lock, flags); 257 if (!timer_stats_active) 258 goto out_unlock; 259 260 entry = tstat_lookup(&input, comm); 261 if (likely(entry)) 262 entry->count++; 263 else 264 atomic_inc(&overflow_count); 265 266 out_unlock: 267 raw_spin_unlock_irqrestore(lock, flags); 268} 269 270static void print_name_offset(struct seq_file *m, unsigned long addr) 271{ 272 char symname[KSYM_NAME_LEN]; 273 274 if (lookup_symbol_name(addr, symname) < 0) 275 seq_printf(m, "<%p>", (void *)addr); 276 else 277 seq_printf(m, "%s", symname); 278} 279 280static int tstats_show(struct seq_file *m, void *v) 281{ 282 struct timespec period; 283 struct entry *entry; 284 unsigned long ms; 285 long events = 0; 286 ktime_t time; 287 int i; 288 289 mutex_lock(&show_mutex); 290 /* 291 * If still active then calculate up to now: 292 */ 293 if (timer_stats_active) 294 time_stop = ktime_get(); 295 296 time = ktime_sub(time_stop, time_start); 297 298 period = ktime_to_timespec(time); 299 ms = period.tv_nsec / 1000000; 300 301 seq_puts(m, "Timer Stats Version: v0.3\n"); 302 seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms); 303 if (atomic_read(&overflow_count)) 304 seq_printf(m, "Overflow: %d entries\n", atomic_read(&overflow_count)); 305 seq_printf(m, "Collection: %s\n", timer_stats_active ? "active" : "inactive"); 306 307 for (i = 0; i < nr_entries; i++) { 308 entry = entries + i; 309 if (entry->timer_flag & TIMER_STATS_FLAG_DEFERRABLE) { 310 seq_printf(m, "%4luD, %5d %-16s ", 311 entry->count, entry->pid, entry->comm); 312 } else { 313 seq_printf(m, " %4lu, %5d %-16s ", 314 entry->count, entry->pid, entry->comm); 315 } 316 317 print_name_offset(m, (unsigned long)entry->start_func); 318 seq_puts(m, " ("); 319 print_name_offset(m, (unsigned long)entry->expire_func); 320 seq_puts(m, ")\n"); 321 322 events += entry->count; 323 } 324 325 ms += period.tv_sec * 1000; 326 if (!ms) 327 ms = 1; 328 329 if (events && period.tv_sec) 330 seq_printf(m, "%ld total events, %ld.%03ld events/sec\n", 331 events, events * 1000 / ms, 332 (events * 1000000 / ms) % 1000); 333 else 334 seq_printf(m, "%ld total events\n", events); 335 336 mutex_unlock(&show_mutex); 337 338 return 0; 339} 340 341/* 342 * After a state change, make sure all concurrent lookup/update 343 * activities have stopped: 344 */ 345static void sync_access(void) 346{ 347 unsigned long flags; 348 int cpu; 349 350 for_each_online_cpu(cpu) { 351 raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu); 352 353 raw_spin_lock_irqsave(lock, flags); 354 /* nothing */ 355 raw_spin_unlock_irqrestore(lock, flags); 356 } 357} 358 359static ssize_t tstats_write(struct file *file, const char __user *buf, 360 size_t count, loff_t *offs) 361{ 362 char ctl[2]; 363 364 if (count != 2 || *offs) 365 return -EINVAL; 366 367 if (copy_from_user(ctl, buf, count)) 368 return -EFAULT; 369 370 mutex_lock(&show_mutex); 371 switch (ctl[0]) { 372 case '0': 373 if (timer_stats_active) { 374 timer_stats_active = 0; 375 time_stop = ktime_get(); 376 sync_access(); 377 } 378 break; 379 case '1': 380 if (!timer_stats_active) { 381 reset_entries(); 382 time_start = ktime_get(); 383 smp_mb(); 384 timer_stats_active = 1; 385 } 386 break; 387 default: 388 count = -EINVAL; 389 } 390 mutex_unlock(&show_mutex); 391 392 return count; 393} 394 395static int tstats_open(struct inode *inode, struct file *filp) 396{ 397 return single_open(filp, tstats_show, NULL); 398} 399 400static const struct file_operations tstats_fops = { 401 .open = tstats_open, 402 .read = seq_read, 403 .write = tstats_write, 404 .llseek = seq_lseek, 405 .release = single_release, 406}; 407 408void __init init_timer_stats(void) 409{ 410 int cpu; 411 412 for_each_possible_cpu(cpu) 413 raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu)); 414} 415 416static int __init init_tstats_procfs(void) 417{ 418 struct proc_dir_entry *pe; 419 420 pe = proc_create("timer_stats", 0644, NULL, &tstats_fops); 421 if (!pe) 422 return -ENOMEM; 423 return 0; 424} 425__initcall(init_tstats_procfs); 426