root/tools/perf/builtin-annotate.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. process_basic_block
  2. process_branch_stack
  3. hist_iter__branch_callback
  4. process_branch_callback
  5. has_annotation
  6. perf_evsel__add_sample
  7. process_sample_event
  8. process_feature_event
  9. hist_entry__tty_annotate
  10. hists__find_annotations
  11. __cmd_annotate
  12. cmd_annotate

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * builtin-annotate.c
   4  *
   5  * Builtin annotate command: Analyze the perf.data input file,
   6  * look up and read DSOs and symbol information and display
   7  * a histogram of results, along various sorting keys.
   8  */
   9 #include "builtin.h"
  10 
  11 #include "util/color.h"
  12 #include <linux/list.h>
  13 #include "util/cache.h"
  14 #include <linux/rbtree.h>
  15 #include <linux/zalloc.h>
  16 #include "util/symbol.h"
  17 
  18 #include "perf.h"
  19 #include "util/debug.h"
  20 
  21 #include "util/evlist.h"
  22 #include "util/evsel.h"
  23 #include "util/annotate.h"
  24 #include "util/event.h"
  25 #include <subcmd/parse-options.h>
  26 #include "util/parse-events.h"
  27 #include "util/sort.h"
  28 #include "util/hist.h"
  29 #include "util/dso.h"
  30 #include "util/machine.h"
  31 #include "util/map.h"
  32 #include "util/session.h"
  33 #include "util/tool.h"
  34 #include "util/data.h"
  35 #include "arch/common.h"
  36 #include "util/block-range.h"
  37 #include "util/map_symbol.h"
  38 #include "util/branch.h"
  39 
  40 #include <dlfcn.h>
  41 #include <errno.h>
  42 #include <linux/bitmap.h>
  43 #include <linux/err.h>
  44 
  45 struct perf_annotate {
  46         struct perf_tool tool;
  47         struct perf_session *session;
  48         struct annotation_options opts;
  49         bool       use_tui, use_stdio, use_stdio2, use_gtk;
  50         bool       skip_missing;
  51         bool       has_br_stack;
  52         bool       group_set;
  53         const char *sym_hist_filter;
  54         const char *cpu_list;
  55         DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
  56 };
  57 
  58 /*
  59  * Given one basic block:
  60  *
  61  *      from    to              branch_i
  62  *      * ----> *
  63  *              |
  64  *              | block
  65  *              v
  66  *              * ----> *
  67  *              from    to      branch_i+1
  68  *
  69  * where the horizontal are the branches and the vertical is the executed
  70  * block of instructions.
  71  *
  72  * We count, for each 'instruction', the number of blocks that covered it as
  73  * well as count the ratio each branch is taken.
  74  *
  75  * We can do this without knowing the actual instruction stream by keeping
  76  * track of the address ranges. We break down ranges such that there is no
  77  * overlap and iterate from the start until the end.
  78  *
  79  * @acme: once we parse the objdump output _before_ processing the samples,
  80  * we can easily fold the branch.cycles IPC bits in.
  81  */
  82 static void process_basic_block(struct addr_map_symbol *start,
  83                                 struct addr_map_symbol *end,
  84                                 struct branch_flags *flags)
  85 {
  86         struct symbol *sym = start->sym;
  87         struct annotation *notes = sym ? symbol__annotation(sym) : NULL;
  88         struct block_range_iter iter;
  89         struct block_range *entry;
  90 
  91         /*
  92          * Sanity; NULL isn't executable and the CPU cannot execute backwards
  93          */
  94         if (!start->addr || start->addr > end->addr)
  95                 return;
  96 
  97         iter = block_range__create(start->addr, end->addr);
  98         if (!block_range_iter__valid(&iter))
  99                 return;
 100 
 101         /*
 102          * First block in range is a branch target.
 103          */
 104         entry = block_range_iter(&iter);
 105         assert(entry->is_target);
 106         entry->entry++;
 107 
 108         do {
 109                 entry = block_range_iter(&iter);
 110 
 111                 entry->coverage++;
 112                 entry->sym = sym;
 113 
 114                 if (notes)
 115                         notes->max_coverage = max(notes->max_coverage, entry->coverage);
 116 
 117         } while (block_range_iter__next(&iter));
 118 
 119         /*
 120          * Last block in rage is a branch.
 121          */
 122         entry = block_range_iter(&iter);
 123         assert(entry->is_branch);
 124         entry->taken++;
 125         if (flags->predicted)
 126                 entry->pred++;
 127 }
 128 
 129 static void process_branch_stack(struct branch_stack *bs, struct addr_location *al,
 130                                  struct perf_sample *sample)
 131 {
 132         struct addr_map_symbol *prev = NULL;
 133         struct branch_info *bi;
 134         int i;
 135 
 136         if (!bs || !bs->nr)
 137                 return;
 138 
 139         bi = sample__resolve_bstack(sample, al);
 140         if (!bi)
 141                 return;
 142 
 143         for (i = bs->nr - 1; i >= 0; i--) {
 144                 /*
 145                  * XXX filter against symbol
 146                  */
 147                 if (prev)
 148                         process_basic_block(prev, &bi[i].from, &bi[i].flags);
 149                 prev = &bi[i].to;
 150         }
 151 
 152         free(bi);
 153 }
 154 
 155 static int hist_iter__branch_callback(struct hist_entry_iter *iter,
 156                                       struct addr_location *al __maybe_unused,
 157                                       bool single __maybe_unused,
 158                                       void *arg __maybe_unused)
 159 {
 160         struct hist_entry *he = iter->he;
 161         struct branch_info *bi;
 162         struct perf_sample *sample = iter->sample;
 163         struct evsel *evsel = iter->evsel;
 164         int err;
 165 
 166         bi = he->branch_info;
 167         err = addr_map_symbol__inc_samples(&bi->from, sample, evsel);
 168 
 169         if (err)
 170                 goto out;
 171 
 172         err = addr_map_symbol__inc_samples(&bi->to, sample, evsel);
 173 
 174 out:
 175         return err;
 176 }
 177 
 178 static int process_branch_callback(struct evsel *evsel,
 179                                    struct perf_sample *sample,
 180                                    struct addr_location *al __maybe_unused,
 181                                    struct perf_annotate *ann,
 182                                    struct machine *machine)
 183 {
 184         struct hist_entry_iter iter = {
 185                 .evsel          = evsel,
 186                 .sample         = sample,
 187                 .add_entry_cb   = hist_iter__branch_callback,
 188                 .hide_unresolved        = symbol_conf.hide_unresolved,
 189                 .ops            = &hist_iter_branch,
 190         };
 191 
 192         struct addr_location a;
 193         int ret;
 194 
 195         if (machine__resolve(machine, &a, sample) < 0)
 196                 return -1;
 197 
 198         if (a.sym == NULL)
 199                 return 0;
 200 
 201         if (a.map != NULL)
 202                 a.map->dso->hit = 1;
 203 
 204         hist__account_cycles(sample->branch_stack, al, sample, false);
 205 
 206         ret = hist_entry_iter__add(&iter, &a, PERF_MAX_STACK_DEPTH, ann);
 207         return ret;
 208 }
 209 
 210 static bool has_annotation(struct perf_annotate *ann)
 211 {
 212         return ui__has_annotation() || ann->use_stdio2;
 213 }
 214 
 215 static int perf_evsel__add_sample(struct evsel *evsel,
 216                                   struct perf_sample *sample,
 217                                   struct addr_location *al,
 218                                   struct perf_annotate *ann,
 219                                   struct machine *machine)
 220 {
 221         struct hists *hists = evsel__hists(evsel);
 222         struct hist_entry *he;
 223         int ret;
 224 
 225         if ((!ann->has_br_stack || !has_annotation(ann)) &&
 226             ann->sym_hist_filter != NULL &&
 227             (al->sym == NULL ||
 228              strcmp(ann->sym_hist_filter, al->sym->name) != 0)) {
 229                 /* We're only interested in a symbol named sym_hist_filter */
 230                 /*
 231                  * FIXME: why isn't this done in the symbol_filter when loading
 232                  * the DSO?
 233                  */
 234                 if (al->sym != NULL) {
 235                         rb_erase_cached(&al->sym->rb_node,
 236                                  &al->map->dso->symbols);
 237                         symbol__delete(al->sym);
 238                         dso__reset_find_symbol_cache(al->map->dso);
 239                 }
 240                 return 0;
 241         }
 242 
 243         /*
 244          * XXX filtered samples can still have branch entires pointing into our
 245          * symbol and are missed.
 246          */
 247         process_branch_stack(sample->branch_stack, al, sample);
 248 
 249         if (ann->has_br_stack && has_annotation(ann))
 250                 return process_branch_callback(evsel, sample, al, ann, machine);
 251 
 252         he = hists__add_entry(hists, al, NULL, NULL, NULL, sample, true);
 253         if (he == NULL)
 254                 return -ENOMEM;
 255 
 256         ret = hist_entry__inc_addr_samples(he, sample, evsel, al->addr);
 257         hists__inc_nr_samples(hists, true);
 258         return ret;
 259 }
 260 
 261 static int process_sample_event(struct perf_tool *tool,
 262                                 union perf_event *event,
 263                                 struct perf_sample *sample,
 264                                 struct evsel *evsel,
 265                                 struct machine *machine)
 266 {
 267         struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool);
 268         struct addr_location al;
 269         int ret = 0;
 270 
 271         if (machine__resolve(machine, &al, sample) < 0) {
 272                 pr_warning("problem processing %d event, skipping it.\n",
 273                            event->header.type);
 274                 return -1;
 275         }
 276 
 277         if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap))
 278                 goto out_put;
 279 
 280         if (!al.filtered &&
 281             perf_evsel__add_sample(evsel, sample, &al, ann, machine)) {
 282                 pr_warning("problem incrementing symbol count, "
 283                            "skipping event\n");
 284                 ret = -1;
 285         }
 286 out_put:
 287         addr_location__put(&al);
 288         return ret;
 289 }
 290 
 291 static int process_feature_event(struct perf_session *session,
 292                                  union perf_event *event)
 293 {
 294         if (event->feat.feat_id < HEADER_LAST_FEATURE)
 295                 return perf_event__process_feature(session, event);
 296         return 0;
 297 }
 298 
 299 static int hist_entry__tty_annotate(struct hist_entry *he,
 300                                     struct evsel *evsel,
 301                                     struct perf_annotate *ann)
 302 {
 303         if (!ann->use_stdio2)
 304                 return symbol__tty_annotate(he->ms.sym, he->ms.map, evsel, &ann->opts);
 305 
 306         return symbol__tty_annotate2(he->ms.sym, he->ms.map, evsel, &ann->opts);
 307 }
 308 
 309 static void hists__find_annotations(struct hists *hists,
 310                                     struct evsel *evsel,
 311                                     struct perf_annotate *ann)
 312 {
 313         struct rb_node *nd = rb_first_cached(&hists->entries), *next;
 314         int key = K_RIGHT;
 315 
 316         while (nd) {
 317                 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
 318                 struct annotation *notes;
 319 
 320                 if (he->ms.sym == NULL || he->ms.map->dso->annotate_warned)
 321                         goto find_next;
 322 
 323                 if (ann->sym_hist_filter &&
 324                     (strcmp(he->ms.sym->name, ann->sym_hist_filter) != 0))
 325                         goto find_next;
 326 
 327                 notes = symbol__annotation(he->ms.sym);
 328                 if (notes->src == NULL) {
 329 find_next:
 330                         if (key == K_LEFT)
 331                                 nd = rb_prev(nd);
 332                         else
 333                                 nd = rb_next(nd);
 334                         continue;
 335                 }
 336 
 337                 if (use_browser == 2) {
 338                         int ret;
 339                         int (*annotate)(struct hist_entry *he,
 340                                         struct evsel *evsel,
 341                                         struct hist_browser_timer *hbt);
 342 
 343                         annotate = dlsym(perf_gtk_handle,
 344                                          "hist_entry__gtk_annotate");
 345                         if (annotate == NULL) {
 346                                 ui__error("GTK browser not found!\n");
 347                                 return;
 348                         }
 349 
 350                         ret = annotate(he, evsel, NULL);
 351                         if (!ret || !ann->skip_missing)
 352                                 return;
 353 
 354                         /* skip missing symbols */
 355                         nd = rb_next(nd);
 356                 } else if (use_browser == 1) {
 357                         key = hist_entry__tui_annotate(he, evsel, NULL, &ann->opts);
 358 
 359                         switch (key) {
 360                         case -1:
 361                                 if (!ann->skip_missing)
 362                                         return;
 363                                 /* fall through */
 364                         case K_RIGHT:
 365                                 next = rb_next(nd);
 366                                 break;
 367                         case K_LEFT:
 368                                 next = rb_prev(nd);
 369                                 break;
 370                         default:
 371                                 return;
 372                         }
 373 
 374                         if (next != NULL)
 375                                 nd = next;
 376                 } else {
 377                         hist_entry__tty_annotate(he, evsel, ann);
 378                         nd = rb_next(nd);
 379                         /*
 380                          * Since we have a hist_entry per IP for the same
 381                          * symbol, free he->ms.sym->src to signal we already
 382                          * processed this symbol.
 383                          */
 384                         zfree(&notes->src->cycles_hist);
 385                         zfree(&notes->src);
 386                 }
 387         }
 388 }
 389 
 390 static int __cmd_annotate(struct perf_annotate *ann)
 391 {
 392         int ret;
 393         struct perf_session *session = ann->session;
 394         struct evsel *pos;
 395         u64 total_nr_samples;
 396 
 397         if (ann->cpu_list) {
 398                 ret = perf_session__cpu_bitmap(session, ann->cpu_list,
 399                                                ann->cpu_bitmap);
 400                 if (ret)
 401                         goto out;
 402         }
 403 
 404         if (!ann->opts.objdump_path) {
 405                 ret = perf_env__lookup_objdump(&session->header.env,
 406                                                &ann->opts.objdump_path);
 407                 if (ret)
 408                         goto out;
 409         }
 410 
 411         ret = perf_session__process_events(session);
 412         if (ret)
 413                 goto out;
 414 
 415         if (dump_trace) {
 416                 perf_session__fprintf_nr_events(session, stdout);
 417                 perf_evlist__fprintf_nr_events(session->evlist, stdout);
 418                 goto out;
 419         }
 420 
 421         if (verbose > 3)
 422                 perf_session__fprintf(session, stdout);
 423 
 424         if (verbose > 2)
 425                 perf_session__fprintf_dsos(session, stdout);
 426 
 427         total_nr_samples = 0;
 428         evlist__for_each_entry(session->evlist, pos) {
 429                 struct hists *hists = evsel__hists(pos);
 430                 u32 nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
 431 
 432                 if (nr_samples > 0) {
 433                         total_nr_samples += nr_samples;
 434                         hists__collapse_resort(hists, NULL);
 435                         /* Don't sort callchain */
 436                         perf_evsel__reset_sample_bit(pos, CALLCHAIN);
 437                         perf_evsel__output_resort(pos, NULL);
 438 
 439                         if (symbol_conf.event_group &&
 440                             !perf_evsel__is_group_leader(pos))
 441                                 continue;
 442 
 443                         hists__find_annotations(hists, pos, ann);
 444                 }
 445         }
 446 
 447         if (total_nr_samples == 0) {
 448                 ui__error("The %s data has no samples!\n", session->data->path);
 449                 goto out;
 450         }
 451 
 452         if (use_browser == 2) {
 453                 void (*show_annotations)(void);
 454 
 455                 show_annotations = dlsym(perf_gtk_handle,
 456                                          "perf_gtk__show_annotations");
 457                 if (show_annotations == NULL) {
 458                         ui__error("GTK browser not found!\n");
 459                         goto out;
 460                 }
 461                 show_annotations();
 462         }
 463 
 464 out:
 465         return ret;
 466 }
 467 
 468 static const char * const annotate_usage[] = {
 469         "perf annotate [<options>]",
 470         NULL
 471 };
 472 
 473 int cmd_annotate(int argc, const char **argv)
 474 {
 475         struct perf_annotate annotate = {
 476                 .tool = {
 477                         .sample = process_sample_event,
 478                         .mmap   = perf_event__process_mmap,
 479                         .mmap2  = perf_event__process_mmap2,
 480                         .comm   = perf_event__process_comm,
 481                         .exit   = perf_event__process_exit,
 482                         .fork   = perf_event__process_fork,
 483                         .namespaces = perf_event__process_namespaces,
 484                         .attr   = perf_event__process_attr,
 485                         .build_id = perf_event__process_build_id,
 486                         .tracing_data   = perf_event__process_tracing_data,
 487                         .feature        = process_feature_event,
 488                         .ordered_events = true,
 489                         .ordering_requires_timestamps = true,
 490                 },
 491                 .opts = annotation__default_options,
 492         };
 493         struct perf_data data = {
 494                 .mode  = PERF_DATA_MODE_READ,
 495         };
 496         struct option options[] = {
 497         OPT_STRING('i', "input", &input_name, "file",
 498                     "input file name"),
 499         OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
 500                    "only consider symbols in these dsos"),
 501         OPT_STRING('s', "symbol", &annotate.sym_hist_filter, "symbol",
 502                     "symbol to annotate"),
 503         OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
 504         OPT_INCR('v', "verbose", &verbose,
 505                     "be more verbose (show symbol address, etc)"),
 506         OPT_BOOLEAN('q', "quiet", &quiet, "do now show any message"),
 507         OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 508                     "dump raw trace in ASCII"),
 509         OPT_BOOLEAN(0, "gtk", &annotate.use_gtk, "Use the GTK interface"),
 510         OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"),
 511         OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"),
 512         OPT_BOOLEAN(0, "stdio2", &annotate.use_stdio2, "Use the stdio interface"),
 513         OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
 514                     "don't load vmlinux even if found"),
 515         OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
 516                    "file", "vmlinux pathname"),
 517         OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
 518                     "load module symbols - WARNING: use only with -k and LIVE kernel"),
 519         OPT_BOOLEAN('l', "print-line", &annotate.opts.print_lines,
 520                     "print matching source lines (may be slow)"),
 521         OPT_BOOLEAN('P', "full-paths", &annotate.opts.full_path,
 522                     "Don't shorten the displayed pathnames"),
 523         OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing,
 524                     "Skip symbols that cannot be annotated"),
 525         OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group,
 526                         &annotate.group_set,
 527                         "Show event group information together"),
 528         OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"),
 529         OPT_CALLBACK(0, "symfs", NULL, "directory",
 530                      "Look for files with symbols relative to this directory",
 531                      symbol__config_symfs),
 532         OPT_BOOLEAN(0, "source", &annotate.opts.annotate_src,
 533                     "Interleave source code with assembly code (default)"),
 534         OPT_BOOLEAN(0, "asm-raw", &annotate.opts.show_asm_raw,
 535                     "Display raw encoding of assembly instructions (default)"),
 536         OPT_STRING('M', "disassembler-style", &annotate.opts.disassembler_style, "disassembler style",
 537                    "Specify disassembler style (e.g. -M intel for intel syntax)"),
 538         OPT_STRING(0, "objdump", &annotate.opts.objdump_path, "path",
 539                    "objdump binary to use for disassembly and annotations"),
 540         OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
 541                     "Show event group information together"),
 542         OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
 543                     "Show a column with the sum of periods"),
 544         OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
 545                     "Show a column with the number of samples"),
 546         OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode",
 547                              "'always' (default), 'never' or 'auto' only applicable to --stdio mode",
 548                              stdio__config_color, "always"),
 549         OPT_CALLBACK(0, "percent-type", &annotate.opts, "local-period",
 550                      "Set percent type local/global-period/hits",
 551                      annotate_parse_percent_type),
 552 
 553         OPT_END()
 554         };
 555         int ret;
 556 
 557         set_option_flag(options, 0, "show-total-period", PARSE_OPT_EXCLUSIVE);
 558         set_option_flag(options, 0, "show-nr-samples", PARSE_OPT_EXCLUSIVE);
 559 
 560 
 561         ret = hists__init();
 562         if (ret < 0)
 563                 return ret;
 564 
 565         argc = parse_options(argc, argv, options, annotate_usage, 0);
 566         if (argc) {
 567                 /*
 568                  * Special case: if there's an argument left then assume that
 569                  * it's a symbol filter:
 570                  */
 571                 if (argc > 1)
 572                         usage_with_options(annotate_usage, options);
 573 
 574                 annotate.sym_hist_filter = argv[0];
 575         }
 576 
 577         if (symbol_conf.show_nr_samples && annotate.use_gtk) {
 578                 pr_err("--show-nr-samples is not available in --gtk mode at this time\n");
 579                 return ret;
 580         }
 581 
 582         if (quiet)
 583                 perf_quiet_option();
 584 
 585         data.path = input_name;
 586 
 587         annotate.session = perf_session__new(&data, false, &annotate.tool);
 588         if (IS_ERR(annotate.session))
 589                 return PTR_ERR(annotate.session);
 590 
 591         annotate.has_br_stack = perf_header__has_feat(&annotate.session->header,
 592                                                       HEADER_BRANCH_STACK);
 593 
 594         if (annotate.group_set)
 595                 perf_evlist__force_leader(annotate.session->evlist);
 596 
 597         ret = symbol__annotation_init();
 598         if (ret < 0)
 599                 goto out_delete;
 600 
 601         annotation_config__init();
 602 
 603         symbol_conf.try_vmlinux_path = true;
 604 
 605         ret = symbol__init(&annotate.session->header.env);
 606         if (ret < 0)
 607                 goto out_delete;
 608 
 609         if (annotate.use_stdio || annotate.use_stdio2)
 610                 use_browser = 0;
 611         else if (annotate.use_tui)
 612                 use_browser = 1;
 613         else if (annotate.use_gtk)
 614                 use_browser = 2;
 615 
 616         setup_browser(true);
 617 
 618         if ((use_browser == 1 || annotate.use_stdio2) && annotate.has_br_stack) {
 619                 sort__mode = SORT_MODE__BRANCH;
 620                 if (setup_sorting(annotate.session->evlist) < 0)
 621                         usage_with_options(annotate_usage, options);
 622         } else {
 623                 if (setup_sorting(NULL) < 0)
 624                         usage_with_options(annotate_usage, options);
 625         }
 626 
 627         ret = __cmd_annotate(&annotate);
 628 
 629 out_delete:
 630         /*
 631          * Speed up the exit process, for large files this can
 632          * take quite a while.
 633          *
 634          * XXX Enable this when using valgrind or if we ever
 635          * librarize this command.
 636          *
 637          * Also experiment with obstacks to see how much speed
 638          * up we'll get here.
 639          *
 640          * perf_session__delete(session);
 641          */
 642         return ret;
 643 }

/* [<][>][^][v][top][bottom][index][help] */