1/* 2 * mem-memcpy.c 3 * 4 * memcpy: Simple memory copy in various ways 5 * 6 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> 7 */ 8 9#include "../perf.h" 10#include "../util/util.h" 11#include "../util/parse-options.h" 12#include "../util/header.h" 13#include "../util/cloexec.h" 14#include "bench.h" 15#include "mem-memcpy-arch.h" 16#include "mem-memset-arch.h" 17 18#include <stdio.h> 19#include <stdlib.h> 20#include <string.h> 21#include <sys/time.h> 22#include <errno.h> 23 24#define K 1024 25 26static const char *length_str = "1MB"; 27static const char *routine = "default"; 28static int iterations = 1; 29static bool use_cycle; 30static int cycle_fd; 31static bool only_prefault; 32static bool no_prefault; 33 34static const struct option options[] = { 35 OPT_STRING('l', "length", &length_str, "1MB", 36 "Specify length of memory to copy. " 37 "Available units: B, KB, MB, GB and TB (upper and lower)"), 38 OPT_STRING('r', "routine", &routine, "default", 39 "Specify routine to copy, \"all\" runs all available routines"), 40 OPT_INTEGER('i', "iterations", &iterations, 41 "repeat memcpy() invocation this number of times"), 42 OPT_BOOLEAN('c', "cycle", &use_cycle, 43 "Use cycles event instead of gettimeofday() for measuring"), 44 OPT_BOOLEAN('o', "only-prefault", &only_prefault, 45 "Show only the result with page faults before memcpy()"), 46 OPT_BOOLEAN('n', "no-prefault", &no_prefault, 47 "Show only the result without page faults before memcpy()"), 48 OPT_END() 49}; 50 51typedef void *(*memcpy_t)(void *, const void *, size_t); 52typedef void *(*memset_t)(void *, int, size_t); 53 54struct routine { 55 const char *name; 56 const char *desc; 57 union { 58 memcpy_t memcpy; 59 memset_t memset; 60 } fn; 61}; 62 63struct routine memcpy_routines[] = { 64 { .name = "default", 65 .desc = "Default memcpy() provided by glibc", 66 .fn.memcpy = memcpy }, 67#ifdef HAVE_ARCH_X86_64_SUPPORT 68 69#define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, 70#include "mem-memcpy-x86-64-asm-def.h" 71#undef MEMCPY_FN 72 73#endif 74 75 { NULL, 76 NULL, 77 {NULL} } 78}; 79 80static const char * const bench_mem_memcpy_usage[] = { 81 "perf bench mem memcpy <options>", 82 NULL 83}; 84 85static struct perf_event_attr cycle_attr = { 86 .type = PERF_TYPE_HARDWARE, 87 .config = PERF_COUNT_HW_CPU_CYCLES 88}; 89 90static void init_cycle(void) 91{ 92 cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, 93 perf_event_open_cloexec_flag()); 94 95 if (cycle_fd < 0 && errno == ENOSYS) 96 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 97 else 98 BUG_ON(cycle_fd < 0); 99} 100 101static u64 get_cycle(void) 102{ 103 int ret; 104 u64 clk; 105 106 ret = read(cycle_fd, &clk, sizeof(u64)); 107 BUG_ON(ret != sizeof(u64)); 108 109 return clk; 110} 111 112static double timeval2double(struct timeval *ts) 113{ 114 return (double)ts->tv_sec + 115 (double)ts->tv_usec / (double)1000000; 116} 117 118#define pf (no_prefault ? 0 : 1) 119 120#define print_bps(x) do { \ 121 if (x < K) \ 122 printf(" %14lf B/Sec", x); \ 123 else if (x < K * K) \ 124 printf(" %14lfd KB/Sec", x / K); \ 125 else if (x < K * K * K) \ 126 printf(" %14lf MB/Sec", x / K / K); \ 127 else \ 128 printf(" %14lf GB/Sec", x / K / K / K); \ 129 } while (0) 130 131struct bench_mem_info { 132 const struct routine *routines; 133 u64 (*do_cycle)(const struct routine *r, size_t len, bool prefault); 134 double (*do_gettimeofday)(const struct routine *r, size_t len, bool prefault); 135 const char *const *usage; 136}; 137 138static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen) 139{ 140 const struct routine *r = &info->routines[r_idx]; 141 double result_bps[2]; 142 u64 result_cycle[2]; 143 144 result_cycle[0] = result_cycle[1] = 0ULL; 145 result_bps[0] = result_bps[1] = 0.0; 146 147 printf("Routine %s (%s)\n", r->name, r->desc); 148 149 if (bench_format == BENCH_FORMAT_DEFAULT) 150 printf("# Copying %s Bytes ...\n\n", length_str); 151 152 if (!only_prefault && !no_prefault) { 153 /* show both of results */ 154 if (use_cycle) { 155 result_cycle[0] = info->do_cycle(r, len, false); 156 result_cycle[1] = info->do_cycle(r, len, true); 157 } else { 158 result_bps[0] = info->do_gettimeofday(r, len, false); 159 result_bps[1] = info->do_gettimeofday(r, len, true); 160 } 161 } else { 162 if (use_cycle) 163 result_cycle[pf] = info->do_cycle(r, len, only_prefault); 164 else 165 result_bps[pf] = info->do_gettimeofday(r, len, only_prefault); 166 } 167 168 switch (bench_format) { 169 case BENCH_FORMAT_DEFAULT: 170 if (!only_prefault && !no_prefault) { 171 if (use_cycle) { 172 printf(" %14lf Cycle/Byte\n", 173 (double)result_cycle[0] 174 / totallen); 175 printf(" %14lf Cycle/Byte (with prefault)\n", 176 (double)result_cycle[1] 177 / totallen); 178 } else { 179 print_bps(result_bps[0]); 180 printf("\n"); 181 print_bps(result_bps[1]); 182 printf(" (with prefault)\n"); 183 } 184 } else { 185 if (use_cycle) { 186 printf(" %14lf Cycle/Byte", 187 (double)result_cycle[pf] 188 / totallen); 189 } else 190 print_bps(result_bps[pf]); 191 192 printf("%s\n", only_prefault ? " (with prefault)" : ""); 193 } 194 break; 195 case BENCH_FORMAT_SIMPLE: 196 if (!only_prefault && !no_prefault) { 197 if (use_cycle) { 198 printf("%lf %lf\n", 199 (double)result_cycle[0] / totallen, 200 (double)result_cycle[1] / totallen); 201 } else { 202 printf("%lf %lf\n", 203 result_bps[0], result_bps[1]); 204 } 205 } else { 206 if (use_cycle) { 207 printf("%lf\n", (double)result_cycle[pf] 208 / totallen); 209 } else 210 printf("%lf\n", result_bps[pf]); 211 } 212 break; 213 default: 214 /* reaching this means there's some disaster: */ 215 die("unknown format: %d\n", bench_format); 216 break; 217 } 218} 219 220static int bench_mem_common(int argc, const char **argv, 221 const char *prefix __maybe_unused, 222 struct bench_mem_info *info) 223{ 224 int i; 225 size_t len; 226 double totallen; 227 228 argc = parse_options(argc, argv, options, 229 info->usage, 0); 230 231 if (no_prefault && only_prefault) { 232 fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); 233 return 1; 234 } 235 236 if (use_cycle) 237 init_cycle(); 238 239 len = (size_t)perf_atoll((char *)length_str); 240 totallen = (double)len * iterations; 241 242 if ((s64)len <= 0) { 243 fprintf(stderr, "Invalid length:%s\n", length_str); 244 return 1; 245 } 246 247 /* same to without specifying either of prefault and no-prefault */ 248 if (only_prefault && no_prefault) 249 only_prefault = no_prefault = false; 250 251 if (!strncmp(routine, "all", 3)) { 252 for (i = 0; info->routines[i].name; i++) 253 __bench_mem_routine(info, i, len, totallen); 254 return 0; 255 } 256 257 for (i = 0; info->routines[i].name; i++) { 258 if (!strcmp(info->routines[i].name, routine)) 259 break; 260 } 261 if (!info->routines[i].name) { 262 printf("Unknown routine:%s\n", routine); 263 printf("Available routines...\n"); 264 for (i = 0; info->routines[i].name; i++) { 265 printf("\t%s ... %s\n", 266 info->routines[i].name, info->routines[i].desc); 267 } 268 return 1; 269 } 270 271 __bench_mem_routine(info, i, len, totallen); 272 273 return 0; 274} 275 276static void memcpy_alloc_mem(void **dst, void **src, size_t length) 277{ 278 *dst = zalloc(length); 279 if (!*dst) 280 die("memory allocation failed - maybe length is too large?\n"); 281 282 *src = zalloc(length); 283 if (!*src) 284 die("memory allocation failed - maybe length is too large?\n"); 285 /* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */ 286 memset(*src, 0, length); 287} 288 289static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault) 290{ 291 u64 cycle_start = 0ULL, cycle_end = 0ULL; 292 void *src = NULL, *dst = NULL; 293 memcpy_t fn = r->fn.memcpy; 294 int i; 295 296 memcpy_alloc_mem(&dst, &src, len); 297 298 if (prefault) 299 fn(dst, src, len); 300 301 cycle_start = get_cycle(); 302 for (i = 0; i < iterations; ++i) 303 fn(dst, src, len); 304 cycle_end = get_cycle(); 305 306 free(src); 307 free(dst); 308 return cycle_end - cycle_start; 309} 310 311static double do_memcpy_gettimeofday(const struct routine *r, size_t len, 312 bool prefault) 313{ 314 struct timeval tv_start, tv_end, tv_diff; 315 memcpy_t fn = r->fn.memcpy; 316 void *src = NULL, *dst = NULL; 317 int i; 318 319 memcpy_alloc_mem(&dst, &src, len); 320 321 if (prefault) 322 fn(dst, src, len); 323 324 BUG_ON(gettimeofday(&tv_start, NULL)); 325 for (i = 0; i < iterations; ++i) 326 fn(dst, src, len); 327 BUG_ON(gettimeofday(&tv_end, NULL)); 328 329 timersub(&tv_end, &tv_start, &tv_diff); 330 331 free(src); 332 free(dst); 333 return (double)(((double)len * iterations) / timeval2double(&tv_diff)); 334} 335 336int bench_mem_memcpy(int argc, const char **argv, 337 const char *prefix __maybe_unused) 338{ 339 struct bench_mem_info info = { 340 .routines = memcpy_routines, 341 .do_cycle = do_memcpy_cycle, 342 .do_gettimeofday = do_memcpy_gettimeofday, 343 .usage = bench_mem_memcpy_usage, 344 }; 345 346 return bench_mem_common(argc, argv, prefix, &info); 347} 348 349static void memset_alloc_mem(void **dst, size_t length) 350{ 351 *dst = zalloc(length); 352 if (!*dst) 353 die("memory allocation failed - maybe length is too large?\n"); 354} 355 356static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault) 357{ 358 u64 cycle_start = 0ULL, cycle_end = 0ULL; 359 memset_t fn = r->fn.memset; 360 void *dst = NULL; 361 int i; 362 363 memset_alloc_mem(&dst, len); 364 365 if (prefault) 366 fn(dst, -1, len); 367 368 cycle_start = get_cycle(); 369 for (i = 0; i < iterations; ++i) 370 fn(dst, i, len); 371 cycle_end = get_cycle(); 372 373 free(dst); 374 return cycle_end - cycle_start; 375} 376 377static double do_memset_gettimeofday(const struct routine *r, size_t len, 378 bool prefault) 379{ 380 struct timeval tv_start, tv_end, tv_diff; 381 memset_t fn = r->fn.memset; 382 void *dst = NULL; 383 int i; 384 385 memset_alloc_mem(&dst, len); 386 387 if (prefault) 388 fn(dst, -1, len); 389 390 BUG_ON(gettimeofday(&tv_start, NULL)); 391 for (i = 0; i < iterations; ++i) 392 fn(dst, i, len); 393 BUG_ON(gettimeofday(&tv_end, NULL)); 394 395 timersub(&tv_end, &tv_start, &tv_diff); 396 397 free(dst); 398 return (double)(((double)len * iterations) / timeval2double(&tv_diff)); 399} 400 401static const char * const bench_mem_memset_usage[] = { 402 "perf bench mem memset <options>", 403 NULL 404}; 405 406static const struct routine memset_routines[] = { 407 { .name ="default", 408 .desc = "Default memset() provided by glibc", 409 .fn.memset = memset }, 410#ifdef HAVE_ARCH_X86_64_SUPPORT 411 412#define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, 413#include "mem-memset-x86-64-asm-def.h" 414#undef MEMSET_FN 415 416#endif 417 418 { .name = NULL, 419 .desc = NULL, 420 .fn.memset = NULL } 421}; 422 423int bench_mem_memset(int argc, const char **argv, 424 const char *prefix __maybe_unused) 425{ 426 struct bench_mem_info info = { 427 .routines = memset_routines, 428 .do_cycle = do_memset_cycle, 429 .do_gettimeofday = do_memset_gettimeofday, 430 .usage = bench_mem_memset_usage, 431 }; 432 433 return bench_mem_common(argc, argv, prefix, &info); 434} 435