1#include "builtin.h"
2#include "perf.h"
3
4#include "util/evlist.h"
5#include "util/evsel.h"
6#include "util/util.h"
7#include "util/cache.h"
8#include "util/symbol.h"
9#include "util/thread.h"
10#include "util/header.h"
11#include "util/session.h"
12#include "util/tool.h"
13
14#include "util/parse-options.h"
15#include "util/trace-event.h"
16#include "util/data.h"
17#include "util/cpumap.h"
18
19#include "util/debug.h"
20
21#include <linux/rbtree.h>
22#include <linux/string.h>
23#include <locale.h>
24
25static int	kmem_slab;
26static int	kmem_page;
27
28static long	kmem_page_size;
29
30struct alloc_stat;
31typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
32
33static int			alloc_flag;
34static int			caller_flag;
35
36static int			alloc_lines = -1;
37static int			caller_lines = -1;
38
39static bool			raw_ip;
40
41struct alloc_stat {
42	u64	call_site;
43	u64	ptr;
44	u64	bytes_req;
45	u64	bytes_alloc;
46	u32	hit;
47	u32	pingpong;
48
49	short	alloc_cpu;
50
51	struct rb_node node;
52};
53
54static struct rb_root root_alloc_stat;
55static struct rb_root root_alloc_sorted;
56static struct rb_root root_caller_stat;
57static struct rb_root root_caller_sorted;
58
59static unsigned long total_requested, total_allocated;
60static unsigned long nr_allocs, nr_cross_allocs;
61
62static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
63			     int bytes_req, int bytes_alloc, int cpu)
64{
65	struct rb_node **node = &root_alloc_stat.rb_node;
66	struct rb_node *parent = NULL;
67	struct alloc_stat *data = NULL;
68
69	while (*node) {
70		parent = *node;
71		data = rb_entry(*node, struct alloc_stat, node);
72
73		if (ptr > data->ptr)
74			node = &(*node)->rb_right;
75		else if (ptr < data->ptr)
76			node = &(*node)->rb_left;
77		else
78			break;
79	}
80
81	if (data && data->ptr == ptr) {
82		data->hit++;
83		data->bytes_req += bytes_req;
84		data->bytes_alloc += bytes_alloc;
85	} else {
86		data = malloc(sizeof(*data));
87		if (!data) {
88			pr_err("%s: malloc failed\n", __func__);
89			return -1;
90		}
91		data->ptr = ptr;
92		data->pingpong = 0;
93		data->hit = 1;
94		data->bytes_req = bytes_req;
95		data->bytes_alloc = bytes_alloc;
96
97		rb_link_node(&data->node, parent, node);
98		rb_insert_color(&data->node, &root_alloc_stat);
99	}
100	data->call_site = call_site;
101	data->alloc_cpu = cpu;
102	return 0;
103}
104
105static int insert_caller_stat(unsigned long call_site,
106			      int bytes_req, int bytes_alloc)
107{
108	struct rb_node **node = &root_caller_stat.rb_node;
109	struct rb_node *parent = NULL;
110	struct alloc_stat *data = NULL;
111
112	while (*node) {
113		parent = *node;
114		data = rb_entry(*node, struct alloc_stat, node);
115
116		if (call_site > data->call_site)
117			node = &(*node)->rb_right;
118		else if (call_site < data->call_site)
119			node = &(*node)->rb_left;
120		else
121			break;
122	}
123
124	if (data && data->call_site == call_site) {
125		data->hit++;
126		data->bytes_req += bytes_req;
127		data->bytes_alloc += bytes_alloc;
128	} else {
129		data = malloc(sizeof(*data));
130		if (!data) {
131			pr_err("%s: malloc failed\n", __func__);
132			return -1;
133		}
134		data->call_site = call_site;
135		data->pingpong = 0;
136		data->hit = 1;
137		data->bytes_req = bytes_req;
138		data->bytes_alloc = bytes_alloc;
139
140		rb_link_node(&data->node, parent, node);
141		rb_insert_color(&data->node, &root_caller_stat);
142	}
143
144	return 0;
145}
146
147static int perf_evsel__process_alloc_event(struct perf_evsel *evsel,
148					   struct perf_sample *sample)
149{
150	unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"),
151		      call_site = perf_evsel__intval(evsel, sample, "call_site");
152	int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"),
153	    bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc");
154
155	if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) ||
156	    insert_caller_stat(call_site, bytes_req, bytes_alloc))
157		return -1;
158
159	total_requested += bytes_req;
160	total_allocated += bytes_alloc;
161
162	nr_allocs++;
163	return 0;
164}
165
166static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel,
167						struct perf_sample *sample)
168{
169	int ret = perf_evsel__process_alloc_event(evsel, sample);
170
171	if (!ret) {
172		int node1 = cpu__get_node(sample->cpu),
173		    node2 = perf_evsel__intval(evsel, sample, "node");
174
175		if (node1 != node2)
176			nr_cross_allocs++;
177	}
178
179	return ret;
180}
181
182static int ptr_cmp(struct alloc_stat *, struct alloc_stat *);
183static int callsite_cmp(struct alloc_stat *, struct alloc_stat *);
184
185static struct alloc_stat *search_alloc_stat(unsigned long ptr,
186					    unsigned long call_site,
187					    struct rb_root *root,
188					    sort_fn_t sort_fn)
189{
190	struct rb_node *node = root->rb_node;
191	struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
192
193	while (node) {
194		struct alloc_stat *data;
195		int cmp;
196
197		data = rb_entry(node, struct alloc_stat, node);
198
199		cmp = sort_fn(&key, data);
200		if (cmp < 0)
201			node = node->rb_left;
202		else if (cmp > 0)
203			node = node->rb_right;
204		else
205			return data;
206	}
207	return NULL;
208}
209
210static int perf_evsel__process_free_event(struct perf_evsel *evsel,
211					  struct perf_sample *sample)
212{
213	unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr");
214	struct alloc_stat *s_alloc, *s_caller;
215
216	s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
217	if (!s_alloc)
218		return 0;
219
220	if ((short)sample->cpu != s_alloc->alloc_cpu) {
221		s_alloc->pingpong++;
222
223		s_caller = search_alloc_stat(0, s_alloc->call_site,
224					     &root_caller_stat, callsite_cmp);
225		if (!s_caller)
226			return -1;
227		s_caller->pingpong++;
228	}
229	s_alloc->alloc_cpu = -1;
230
231	return 0;
232}
233
234static u64 total_page_alloc_bytes;
235static u64 total_page_free_bytes;
236static u64 total_page_nomatch_bytes;
237static u64 total_page_fail_bytes;
238static unsigned long nr_page_allocs;
239static unsigned long nr_page_frees;
240static unsigned long nr_page_fails;
241static unsigned long nr_page_nomatch;
242
243static bool use_pfn;
244
245#define MAX_MIGRATE_TYPES  6
246#define MAX_PAGE_ORDER     11
247
248static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES];
249
250struct page_stat {
251	struct rb_node 	node;
252	u64 		page;
253	int 		order;
254	unsigned 	gfp_flags;
255	unsigned 	migrate_type;
256	u64		alloc_bytes;
257	u64 		free_bytes;
258	int 		nr_alloc;
259	int 		nr_free;
260};
261
262static struct rb_root page_tree;
263static struct rb_root page_alloc_tree;
264static struct rb_root page_alloc_sorted;
265
266static struct page_stat *search_page(unsigned long page, bool create)
267{
268	struct rb_node **node = &page_tree.rb_node;
269	struct rb_node *parent = NULL;
270	struct page_stat *data;
271
272	while (*node) {
273		s64 cmp;
274
275		parent = *node;
276		data = rb_entry(*node, struct page_stat, node);
277
278		cmp = data->page - page;
279		if (cmp < 0)
280			node = &parent->rb_left;
281		else if (cmp > 0)
282			node = &parent->rb_right;
283		else
284			return data;
285	}
286
287	if (!create)
288		return NULL;
289
290	data = zalloc(sizeof(*data));
291	if (data != NULL) {
292		data->page = page;
293
294		rb_link_node(&data->node, parent, node);
295		rb_insert_color(&data->node, &page_tree);
296	}
297
298	return data;
299}
300
301static int page_stat_cmp(struct page_stat *a, struct page_stat *b)
302{
303	if (a->page > b->page)
304		return -1;
305	if (a->page < b->page)
306		return 1;
307	if (a->order > b->order)
308		return -1;
309	if (a->order < b->order)
310		return 1;
311	if (a->migrate_type > b->migrate_type)
312		return -1;
313	if (a->migrate_type < b->migrate_type)
314		return 1;
315	if (a->gfp_flags > b->gfp_flags)
316		return -1;
317	if (a->gfp_flags < b->gfp_flags)
318		return 1;
319	return 0;
320}
321
322static struct page_stat *search_page_alloc_stat(struct page_stat *pstat, bool create)
323{
324	struct rb_node **node = &page_alloc_tree.rb_node;
325	struct rb_node *parent = NULL;
326	struct page_stat *data;
327
328	while (*node) {
329		s64 cmp;
330
331		parent = *node;
332		data = rb_entry(*node, struct page_stat, node);
333
334		cmp = page_stat_cmp(data, pstat);
335		if (cmp < 0)
336			node = &parent->rb_left;
337		else if (cmp > 0)
338			node = &parent->rb_right;
339		else
340			return data;
341	}
342
343	if (!create)
344		return NULL;
345
346	data = zalloc(sizeof(*data));
347	if (data != NULL) {
348		data->page = pstat->page;
349		data->order = pstat->order;
350		data->gfp_flags = pstat->gfp_flags;
351		data->migrate_type = pstat->migrate_type;
352
353		rb_link_node(&data->node, parent, node);
354		rb_insert_color(&data->node, &page_alloc_tree);
355	}
356
357	return data;
358}
359
360static bool valid_page(u64 pfn_or_page)
361{
362	if (use_pfn && pfn_or_page == -1UL)
363		return false;
364	if (!use_pfn && pfn_or_page == 0)
365		return false;
366	return true;
367}
368
369static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
370						struct perf_sample *sample)
371{
372	u64 page;
373	unsigned int order = perf_evsel__intval(evsel, sample, "order");
374	unsigned int gfp_flags = perf_evsel__intval(evsel, sample, "gfp_flags");
375	unsigned int migrate_type = perf_evsel__intval(evsel, sample,
376						       "migratetype");
377	u64 bytes = kmem_page_size << order;
378	struct page_stat *pstat;
379	struct page_stat this = {
380		.order = order,
381		.gfp_flags = gfp_flags,
382		.migrate_type = migrate_type,
383	};
384
385	if (use_pfn)
386		page = perf_evsel__intval(evsel, sample, "pfn");
387	else
388		page = perf_evsel__intval(evsel, sample, "page");
389
390	nr_page_allocs++;
391	total_page_alloc_bytes += bytes;
392
393	if (!valid_page(page)) {
394		nr_page_fails++;
395		total_page_fail_bytes += bytes;
396
397		return 0;
398	}
399
400	/*
401	 * This is to find the current page (with correct gfp flags and
402	 * migrate type) at free event.
403	 */
404	pstat = search_page(page, true);
405	if (pstat == NULL)
406		return -ENOMEM;
407
408	pstat->order = order;
409	pstat->gfp_flags = gfp_flags;
410	pstat->migrate_type = migrate_type;
411
412	this.page = page;
413	pstat = search_page_alloc_stat(&this, true);
414	if (pstat == NULL)
415		return -ENOMEM;
416
417	pstat->nr_alloc++;
418	pstat->alloc_bytes += bytes;
419
420	order_stats[order][migrate_type]++;
421
422	return 0;
423}
424
425static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
426						struct perf_sample *sample)
427{
428	u64 page;
429	unsigned int order = perf_evsel__intval(evsel, sample, "order");
430	u64 bytes = kmem_page_size << order;
431	struct page_stat *pstat;
432	struct page_stat this = {
433		.order = order,
434	};
435
436	if (use_pfn)
437		page = perf_evsel__intval(evsel, sample, "pfn");
438	else
439		page = perf_evsel__intval(evsel, sample, "page");
440
441	nr_page_frees++;
442	total_page_free_bytes += bytes;
443
444	pstat = search_page(page, false);
445	if (pstat == NULL) {
446		pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
447			  page, order);
448
449		nr_page_nomatch++;
450		total_page_nomatch_bytes += bytes;
451
452		return 0;
453	}
454
455	this.page = page;
456	this.gfp_flags = pstat->gfp_flags;
457	this.migrate_type = pstat->migrate_type;
458
459	rb_erase(&pstat->node, &page_tree);
460	free(pstat);
461
462	pstat = search_page_alloc_stat(&this, false);
463	if (pstat == NULL)
464		return -ENOENT;
465
466	pstat->nr_free++;
467	pstat->free_bytes += bytes;
468
469	return 0;
470}
471
472typedef int (*tracepoint_handler)(struct perf_evsel *evsel,
473				  struct perf_sample *sample);
474
475static int process_sample_event(struct perf_tool *tool __maybe_unused,
476				union perf_event *event,
477				struct perf_sample *sample,
478				struct perf_evsel *evsel,
479				struct machine *machine)
480{
481	struct thread *thread = machine__findnew_thread(machine, sample->pid,
482							sample->tid);
483
484	if (thread == NULL) {
485		pr_debug("problem processing %d event, skipping it.\n",
486			 event->header.type);
487		return -1;
488	}
489
490	dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
491
492	if (evsel->handler != NULL) {
493		tracepoint_handler f = evsel->handler;
494		return f(evsel, sample);
495	}
496
497	return 0;
498}
499
500static struct perf_tool perf_kmem = {
501	.sample		 = process_sample_event,
502	.comm		 = perf_event__process_comm,
503	.mmap		 = perf_event__process_mmap,
504	.mmap2		 = perf_event__process_mmap2,
505	.ordered_events	 = true,
506};
507
508static double fragmentation(unsigned long n_req, unsigned long n_alloc)
509{
510	if (n_alloc == 0)
511		return 0.0;
512	else
513		return 100.0 - (100.0 * n_req / n_alloc);
514}
515
516static void __print_slab_result(struct rb_root *root,
517				struct perf_session *session,
518				int n_lines, int is_caller)
519{
520	struct rb_node *next;
521	struct machine *machine = &session->machines.host;
522
523	printf("%.105s\n", graph_dotted_line);
524	printf(" %-34s |",  is_caller ? "Callsite": "Alloc Ptr");
525	printf(" Total_alloc/Per | Total_req/Per   | Hit      | Ping-pong | Frag\n");
526	printf("%.105s\n", graph_dotted_line);
527
528	next = rb_first(root);
529
530	while (next && n_lines--) {
531		struct alloc_stat *data = rb_entry(next, struct alloc_stat,
532						   node);
533		struct symbol *sym = NULL;
534		struct map *map;
535		char buf[BUFSIZ];
536		u64 addr;
537
538		if (is_caller) {
539			addr = data->call_site;
540			if (!raw_ip)
541				sym = machine__find_kernel_function(machine, addr, &map, NULL);
542		} else
543			addr = data->ptr;
544
545		if (sym != NULL)
546			snprintf(buf, sizeof(buf), "%s+%" PRIx64 "", sym->name,
547				 addr - map->unmap_ip(map, sym->start));
548		else
549			snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr);
550		printf(" %-34s |", buf);
551
552		printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %9lu | %6.3f%%\n",
553		       (unsigned long long)data->bytes_alloc,
554		       (unsigned long)data->bytes_alloc / data->hit,
555		       (unsigned long long)data->bytes_req,
556		       (unsigned long)data->bytes_req / data->hit,
557		       (unsigned long)data->hit,
558		       (unsigned long)data->pingpong,
559		       fragmentation(data->bytes_req, data->bytes_alloc));
560
561		next = rb_next(next);
562	}
563
564	if (n_lines == -1)
565		printf(" ...                                | ...             | ...             | ...      | ...       | ...   \n");
566
567	printf("%.105s\n", graph_dotted_line);
568}
569
570static const char * const migrate_type_str[] = {
571	"UNMOVABL",
572	"RECLAIM",
573	"MOVABLE",
574	"RESERVED",
575	"CMA/ISLT",
576	"UNKNOWN",
577};
578
579static void __print_page_result(struct rb_root *root,
580				struct perf_session *session __maybe_unused,
581				int n_lines)
582{
583	struct rb_node *next = rb_first(root);
584	const char *format;
585
586	printf("\n%.80s\n", graph_dotted_line);
587	printf(" %-16s | Total alloc (KB) | Hits      | Order | Mig.type | GFP flags\n",
588	       use_pfn ? "PFN" : "Page");
589	printf("%.80s\n", graph_dotted_line);
590
591	if (use_pfn)
592		format = " %16llu | %'16llu | %'9d | %5d | %8s |  %08lx\n";
593	else
594		format = " %016llx | %'16llu | %'9d | %5d | %8s |  %08lx\n";
595
596	while (next && n_lines--) {
597		struct page_stat *data;
598
599		data = rb_entry(next, struct page_stat, node);
600
601		printf(format, (unsigned long long)data->page,
602		       (unsigned long long)data->alloc_bytes / 1024,
603		       data->nr_alloc, data->order,
604		       migrate_type_str[data->migrate_type],
605		       (unsigned long)data->gfp_flags);
606
607		next = rb_next(next);
608	}
609
610	if (n_lines == -1)
611		printf(" ...              | ...              | ...       | ...   | ...      | ...     \n");
612
613	printf("%.80s\n", graph_dotted_line);
614}
615
616static void print_slab_summary(void)
617{
618	printf("\nSUMMARY (SLAB allocator)");
619	printf("\n========================\n");
620	printf("Total bytes requested: %'lu\n", total_requested);
621	printf("Total bytes allocated: %'lu\n", total_allocated);
622	printf("Total bytes wasted on internal fragmentation: %'lu\n",
623	       total_allocated - total_requested);
624	printf("Internal fragmentation: %f%%\n",
625	       fragmentation(total_requested, total_allocated));
626	printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs);
627}
628
629static void print_page_summary(void)
630{
631	int o, m;
632	u64 nr_alloc_freed = nr_page_frees - nr_page_nomatch;
633	u64 total_alloc_freed_bytes = total_page_free_bytes - total_page_nomatch_bytes;
634
635	printf("\nSUMMARY (page allocator)");
636	printf("\n========================\n");
637	printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total allocation requests",
638	       nr_page_allocs, total_page_alloc_bytes / 1024);
639	printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total free requests",
640	       nr_page_frees, total_page_free_bytes / 1024);
641	printf("\n");
642
643	printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
644	       nr_alloc_freed, (total_alloc_freed_bytes) / 1024);
645	printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
646	       nr_page_allocs - nr_alloc_freed,
647	       (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024);
648	printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total free-only requests",
649	       nr_page_nomatch, total_page_nomatch_bytes / 1024);
650	printf("\n");
651
652	printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total allocation failures",
653	       nr_page_fails, total_page_fail_bytes / 1024);
654	printf("\n");
655
656	printf("%5s  %12s  %12s  %12s  %12s  %12s\n", "Order",  "Unmovable",
657	       "Reclaimable", "Movable", "Reserved", "CMA/Isolated");
658	printf("%.5s  %.12s  %.12s  %.12s  %.12s  %.12s\n", graph_dotted_line,
659	       graph_dotted_line, graph_dotted_line, graph_dotted_line,
660	       graph_dotted_line, graph_dotted_line);
661
662	for (o = 0; o < MAX_PAGE_ORDER; o++) {
663		printf("%5d", o);
664		for (m = 0; m < MAX_MIGRATE_TYPES - 1; m++) {
665			if (order_stats[o][m])
666				printf("  %'12d", order_stats[o][m]);
667			else
668				printf("  %12c", '.');
669		}
670		printf("\n");
671	}
672}
673
674static void print_slab_result(struct perf_session *session)
675{
676	if (caller_flag)
677		__print_slab_result(&root_caller_sorted, session, caller_lines, 1);
678	if (alloc_flag)
679		__print_slab_result(&root_alloc_sorted, session, alloc_lines, 0);
680	print_slab_summary();
681}
682
683static void print_page_result(struct perf_session *session)
684{
685	if (alloc_flag)
686		__print_page_result(&page_alloc_sorted, session, alloc_lines);
687	print_page_summary();
688}
689
690static void print_result(struct perf_session *session)
691{
692	if (kmem_slab)
693		print_slab_result(session);
694	if (kmem_page)
695		print_page_result(session);
696}
697
698struct sort_dimension {
699	const char		name[20];
700	sort_fn_t		cmp;
701	struct list_head	list;
702};
703
704static LIST_HEAD(caller_sort);
705static LIST_HEAD(alloc_sort);
706
707static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data,
708			     struct list_head *sort_list)
709{
710	struct rb_node **new = &(root->rb_node);
711	struct rb_node *parent = NULL;
712	struct sort_dimension *sort;
713
714	while (*new) {
715		struct alloc_stat *this;
716		int cmp = 0;
717
718		this = rb_entry(*new, struct alloc_stat, node);
719		parent = *new;
720
721		list_for_each_entry(sort, sort_list, list) {
722			cmp = sort->cmp(data, this);
723			if (cmp)
724				break;
725		}
726
727		if (cmp > 0)
728			new = &((*new)->rb_left);
729		else
730			new = &((*new)->rb_right);
731	}
732
733	rb_link_node(&data->node, parent, new);
734	rb_insert_color(&data->node, root);
735}
736
737static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted,
738			       struct list_head *sort_list)
739{
740	struct rb_node *node;
741	struct alloc_stat *data;
742
743	for (;;) {
744		node = rb_first(root);
745		if (!node)
746			break;
747
748		rb_erase(node, root);
749		data = rb_entry(node, struct alloc_stat, node);
750		sort_slab_insert(root_sorted, data, sort_list);
751	}
752}
753
754static void sort_page_insert(struct rb_root *root, struct page_stat *data)
755{
756	struct rb_node **new = &root->rb_node;
757	struct rb_node *parent = NULL;
758
759	while (*new) {
760		struct page_stat *this;
761		int cmp = 0;
762
763		this = rb_entry(*new, struct page_stat, node);
764		parent = *new;
765
766		/* TODO: support more sort key */
767		cmp = data->alloc_bytes - this->alloc_bytes;
768
769		if (cmp > 0)
770			new = &parent->rb_left;
771		else
772			new = &parent->rb_right;
773	}
774
775	rb_link_node(&data->node, parent, new);
776	rb_insert_color(&data->node, root);
777}
778
779static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted)
780{
781	struct rb_node *node;
782	struct page_stat *data;
783
784	for (;;) {
785		node = rb_first(root);
786		if (!node)
787			break;
788
789		rb_erase(node, root);
790		data = rb_entry(node, struct page_stat, node);
791		sort_page_insert(root_sorted, data);
792	}
793}
794
795static void sort_result(void)
796{
797	if (kmem_slab) {
798		__sort_slab_result(&root_alloc_stat, &root_alloc_sorted,
799				   &alloc_sort);
800		__sort_slab_result(&root_caller_stat, &root_caller_sorted,
801				   &caller_sort);
802	}
803	if (kmem_page) {
804		__sort_page_result(&page_alloc_tree, &page_alloc_sorted);
805	}
806}
807
808static int __cmd_kmem(struct perf_session *session)
809{
810	int err = -EINVAL;
811	struct perf_evsel *evsel;
812	const struct perf_evsel_str_handler kmem_tracepoints[] = {
813		/* slab allocator */
814		{ "kmem:kmalloc",		perf_evsel__process_alloc_event, },
815    		{ "kmem:kmem_cache_alloc",	perf_evsel__process_alloc_event, },
816		{ "kmem:kmalloc_node",		perf_evsel__process_alloc_node_event, },
817    		{ "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, },
818		{ "kmem:kfree",			perf_evsel__process_free_event, },
819    		{ "kmem:kmem_cache_free",	perf_evsel__process_free_event, },
820		/* page allocator */
821		{ "kmem:mm_page_alloc",		perf_evsel__process_page_alloc_event, },
822		{ "kmem:mm_page_free",		perf_evsel__process_page_free_event, },
823	};
824
825	if (!perf_session__has_traces(session, "kmem record"))
826		goto out;
827
828	if (perf_session__set_tracepoints_handlers(session, kmem_tracepoints)) {
829		pr_err("Initializing perf session tracepoint handlers failed\n");
830		goto out;
831	}
832
833	evlist__for_each(session->evlist, evsel) {
834		if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") &&
835		    perf_evsel__field(evsel, "pfn")) {
836			use_pfn = true;
837			break;
838		}
839	}
840
841	setup_pager();
842	err = perf_session__process_events(session);
843	if (err != 0) {
844		pr_err("error during process events: %d\n", err);
845		goto out;
846	}
847	sort_result();
848	print_result(session);
849out:
850	return err;
851}
852
853static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r)
854{
855	if (l->ptr < r->ptr)
856		return -1;
857	else if (l->ptr > r->ptr)
858		return 1;
859	return 0;
860}
861
862static struct sort_dimension ptr_sort_dimension = {
863	.name	= "ptr",
864	.cmp	= ptr_cmp,
865};
866
867static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
868{
869	if (l->call_site < r->call_site)
870		return -1;
871	else if (l->call_site > r->call_site)
872		return 1;
873	return 0;
874}
875
876static struct sort_dimension callsite_sort_dimension = {
877	.name	= "callsite",
878	.cmp	= callsite_cmp,
879};
880
881static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r)
882{
883	if (l->hit < r->hit)
884		return -1;
885	else if (l->hit > r->hit)
886		return 1;
887	return 0;
888}
889
890static struct sort_dimension hit_sort_dimension = {
891	.name	= "hit",
892	.cmp	= hit_cmp,
893};
894
895static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r)
896{
897	if (l->bytes_alloc < r->bytes_alloc)
898		return -1;
899	else if (l->bytes_alloc > r->bytes_alloc)
900		return 1;
901	return 0;
902}
903
904static struct sort_dimension bytes_sort_dimension = {
905	.name	= "bytes",
906	.cmp	= bytes_cmp,
907};
908
909static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r)
910{
911	double x, y;
912
913	x = fragmentation(l->bytes_req, l->bytes_alloc);
914	y = fragmentation(r->bytes_req, r->bytes_alloc);
915
916	if (x < y)
917		return -1;
918	else if (x > y)
919		return 1;
920	return 0;
921}
922
923static struct sort_dimension frag_sort_dimension = {
924	.name	= "frag",
925	.cmp	= frag_cmp,
926};
927
928static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r)
929{
930	if (l->pingpong < r->pingpong)
931		return -1;
932	else if (l->pingpong > r->pingpong)
933		return 1;
934	return 0;
935}
936
937static struct sort_dimension pingpong_sort_dimension = {
938	.name	= "pingpong",
939	.cmp	= pingpong_cmp,
940};
941
942static struct sort_dimension *avail_sorts[] = {
943	&ptr_sort_dimension,
944	&callsite_sort_dimension,
945	&hit_sort_dimension,
946	&bytes_sort_dimension,
947	&frag_sort_dimension,
948	&pingpong_sort_dimension,
949};
950
951#define NUM_AVAIL_SORTS	((int)ARRAY_SIZE(avail_sorts))
952
953static int sort_dimension__add(const char *tok, struct list_head *list)
954{
955	struct sort_dimension *sort;
956	int i;
957
958	for (i = 0; i < NUM_AVAIL_SORTS; i++) {
959		if (!strcmp(avail_sorts[i]->name, tok)) {
960			sort = memdup(avail_sorts[i], sizeof(*avail_sorts[i]));
961			if (!sort) {
962				pr_err("%s: memdup failed\n", __func__);
963				return -1;
964			}
965			list_add_tail(&sort->list, list);
966			return 0;
967		}
968	}
969
970	return -1;
971}
972
973static int setup_sorting(struct list_head *sort_list, const char *arg)
974{
975	char *tok;
976	char *str = strdup(arg);
977	char *pos = str;
978
979	if (!str) {
980		pr_err("%s: strdup failed\n", __func__);
981		return -1;
982	}
983
984	while (true) {
985		tok = strsep(&pos, ",");
986		if (!tok)
987			break;
988		if (sort_dimension__add(tok, sort_list) < 0) {
989			error("Unknown --sort key: '%s'", tok);
990			free(str);
991			return -1;
992		}
993	}
994
995	free(str);
996	return 0;
997}
998
999static int parse_sort_opt(const struct option *opt __maybe_unused,
1000			  const char *arg, int unset __maybe_unused)
1001{
1002	if (!arg)
1003		return -1;
1004
1005	if (caller_flag > alloc_flag)
1006		return setup_sorting(&caller_sort, arg);
1007	else
1008		return setup_sorting(&alloc_sort, arg);
1009
1010	return 0;
1011}
1012
1013static int parse_caller_opt(const struct option *opt __maybe_unused,
1014			    const char *arg __maybe_unused,
1015			    int unset __maybe_unused)
1016{
1017	caller_flag = (alloc_flag + 1);
1018	return 0;
1019}
1020
1021static int parse_alloc_opt(const struct option *opt __maybe_unused,
1022			   const char *arg __maybe_unused,
1023			   int unset __maybe_unused)
1024{
1025	alloc_flag = (caller_flag + 1);
1026	return 0;
1027}
1028
1029static int parse_slab_opt(const struct option *opt __maybe_unused,
1030			  const char *arg __maybe_unused,
1031			  int unset __maybe_unused)
1032{
1033	kmem_slab = (kmem_page + 1);
1034	return 0;
1035}
1036
1037static int parse_page_opt(const struct option *opt __maybe_unused,
1038			  const char *arg __maybe_unused,
1039			  int unset __maybe_unused)
1040{
1041	kmem_page = (kmem_slab + 1);
1042	return 0;
1043}
1044
1045static int parse_line_opt(const struct option *opt __maybe_unused,
1046			  const char *arg, int unset __maybe_unused)
1047{
1048	int lines;
1049
1050	if (!arg)
1051		return -1;
1052
1053	lines = strtoul(arg, NULL, 10);
1054
1055	if (caller_flag > alloc_flag)
1056		caller_lines = lines;
1057	else
1058		alloc_lines = lines;
1059
1060	return 0;
1061}
1062
1063static int __cmd_record(int argc, const char **argv)
1064{
1065	const char * const record_args[] = {
1066	"record", "-a", "-R", "-c", "1",
1067	};
1068	const char * const slab_events[] = {
1069	"-e", "kmem:kmalloc",
1070	"-e", "kmem:kmalloc_node",
1071	"-e", "kmem:kfree",
1072	"-e", "kmem:kmem_cache_alloc",
1073	"-e", "kmem:kmem_cache_alloc_node",
1074	"-e", "kmem:kmem_cache_free",
1075	};
1076	const char * const page_events[] = {
1077	"-e", "kmem:mm_page_alloc",
1078	"-e", "kmem:mm_page_free",
1079	};
1080	unsigned int rec_argc, i, j;
1081	const char **rec_argv;
1082
1083	rec_argc = ARRAY_SIZE(record_args) + argc - 1;
1084	if (kmem_slab)
1085		rec_argc += ARRAY_SIZE(slab_events);
1086	if (kmem_page)
1087		rec_argc += ARRAY_SIZE(page_events);
1088
1089	rec_argv = calloc(rec_argc + 1, sizeof(char *));
1090
1091	if (rec_argv == NULL)
1092		return -ENOMEM;
1093
1094	for (i = 0; i < ARRAY_SIZE(record_args); i++)
1095		rec_argv[i] = strdup(record_args[i]);
1096
1097	if (kmem_slab) {
1098		for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++)
1099			rec_argv[i] = strdup(slab_events[j]);
1100	}
1101	if (kmem_page) {
1102		for (j = 0; j < ARRAY_SIZE(page_events); j++, i++)
1103			rec_argv[i] = strdup(page_events[j]);
1104	}
1105
1106	for (j = 1; j < (unsigned int)argc; j++, i++)
1107		rec_argv[i] = argv[j];
1108
1109	return cmd_record(i, rec_argv, NULL);
1110}
1111
1112int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
1113{
1114	const char * const default_sort_order = "frag,hit,bytes";
1115	struct perf_data_file file = {
1116		.mode = PERF_DATA_MODE_READ,
1117	};
1118	const struct option kmem_options[] = {
1119	OPT_STRING('i', "input", &input_name, "file", "input file name"),
1120	OPT_INCR('v', "verbose", &verbose,
1121		    "be more verbose (show symbol address, etc)"),
1122	OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL,
1123			   "show per-callsite statistics", parse_caller_opt),
1124	OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
1125			   "show per-allocation statistics", parse_alloc_opt),
1126	OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
1127		     "sort by keys: ptr, call_site, bytes, hit, pingpong, frag",
1128		     parse_sort_opt),
1129	OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt),
1130	OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
1131	OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
1132	OPT_CALLBACK_NOOPT(0, "slab", NULL, NULL, "Analyze slab allocator",
1133			   parse_slab_opt),
1134	OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator",
1135			   parse_page_opt),
1136	OPT_END()
1137	};
1138	const char *const kmem_subcommands[] = { "record", "stat", NULL };
1139	const char *kmem_usage[] = {
1140		NULL,
1141		NULL
1142	};
1143	struct perf_session *session;
1144	int ret = -1;
1145
1146	argc = parse_options_subcommand(argc, argv, kmem_options,
1147					kmem_subcommands, kmem_usage, 0);
1148
1149	if (!argc)
1150		usage_with_options(kmem_usage, kmem_options);
1151
1152	if (kmem_slab == 0 && kmem_page == 0)
1153		kmem_slab = 1;  /* for backward compatibility */
1154
1155	if (!strncmp(argv[0], "rec", 3)) {
1156		symbol__init(NULL);
1157		return __cmd_record(argc, argv);
1158	}
1159
1160	file.path = input_name;
1161
1162	session = perf_session__new(&file, false, &perf_kmem);
1163	if (session == NULL)
1164		return -1;
1165
1166	if (kmem_page) {
1167		struct perf_evsel *evsel = perf_evlist__first(session->evlist);
1168
1169		if (evsel == NULL || evsel->tp_format == NULL) {
1170			pr_err("invalid event found.. aborting\n");
1171			return -1;
1172		}
1173
1174		kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent);
1175	}
1176
1177	symbol__init(&session->header.env);
1178
1179	if (!strcmp(argv[0], "stat")) {
1180		setlocale(LC_ALL, "");
1181
1182		if (cpu__setup_cpunode_map())
1183			goto out_delete;
1184
1185		if (list_empty(&caller_sort))
1186			setup_sorting(&caller_sort, default_sort_order);
1187		if (list_empty(&alloc_sort))
1188			setup_sorting(&alloc_sort, default_sort_order);
1189
1190		ret = __cmd_kmem(session);
1191	} else
1192		usage_with_options(kmem_usage, kmem_options);
1193
1194out_delete:
1195	perf_session__delete(session);
1196
1197	return ret;
1198}
1199
1200