This source file includes following definitions.
- dcbf
- err_msg
- compute_chunk_start_addr
- compute_word_offset
- compute_store_pattern
- extract_tid
- extract_word_offset
- extract_sweep_id
- start_verification_log
- log_anamoly
- end_verification_log
- verify_chunk
- set_pthread_cpu
- set_mycpu
- segv_handler
- set_segv_handler
- rim_fn
- mem_snapshot_fn
- alrm_sighandler
- main
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 #define _GNU_SOURCE
  19 #include <stdio.h>
  20 #include <sys/mman.h>
  21 #include <sys/types.h>
  22 #include <sys/wait.h>
  23 #include <sys/ipc.h>
  24 #include <sys/shm.h>
  25 #include <sys/stat.h>
  26 #include <sys/time.h>
  27 #include <linux/futex.h>
  28 #include <unistd.h>
  29 #include <asm/unistd.h>
  30 #include <string.h>
  31 #include <stdlib.h>
  32 #include <fcntl.h>
  33 #include <sched.h>
  34 #include <time.h>
  35 #include <stdarg.h>
  36 #include <sched.h>
  37 #include <pthread.h>
  38 #include <signal.h>
  39 #include <sys/prctl.h>
  40 
  41 static inline void dcbf(volatile unsigned int *addr)
  42 {
  43         __asm__ __volatile__ ("dcbf %y0; sync" : : "Z"(*(unsigned char *)addr) : "memory");
  44 }
  45 
  46 static void err_msg(char *msg)
  47 {
  48 
  49         time_t now;
  50         time(&now);
  51         printf("=================================\n");
  52         printf("    Error: %s\n", msg);
  53         printf("    %s", ctime(&now));
  54         printf("=================================\n");
  55         exit(1);
  56 }
  57 
  58 static char *map1;
  59 static char *map2;
  60 static pid_t rim_process_pid;
  61 
  62 
  63 
  64 
  65 
  66 
  67 
  68 
  69 
  70 
  71 
  72 
  73 
  74 
  75 
  76 
  77 
  78 
  79 
  80 
  81 
  82 static volatile unsigned int corruption_found;
  83 
  84 
  85 
  86 
  87 
  88 
  89 
  90 
  91 
  92 
  93 #define MAX_THREADS             64
  94 #define THREAD_ID_BITS          8
  95 #define THREAD_ID_MASK          ((1 << THREAD_ID_BITS) - 1)
  96 static unsigned int rim_thread_ids[MAX_THREADS];
  97 static pthread_t rim_threads[MAX_THREADS];
  98 
  99 
 100 
 101 
 102 
 103 
 104 
 105 
 106 
 107 
 108 
 109 #define RIM_CHUNK_SIZE          1024
 110 #define BITS_PER_BYTE           8
 111 #define WORD_SIZE               (sizeof(unsigned int))
 112 #define WORD_BITS               (WORD_SIZE * BITS_PER_BYTE)
 113 #define WORDS_PER_CHUNK         (RIM_CHUNK_SIZE/WORD_SIZE)
 114 
 115 static inline char *compute_chunk_start_addr(unsigned int thread_id)
 116 {
 117         char *chunk_start;
 118 
 119         chunk_start = (char *)((unsigned long)map1 +
 120                                (thread_id * RIM_CHUNK_SIZE));
 121 
 122         return chunk_start;
 123 }
 124 
 125 
 126 
 127 
 128 
 129 
 130 
 131 
 132 
 133 #define WORD_OFFSET_BITS        (__builtin_ctz(WORDS_PER_CHUNK))
 134 #define WORD_OFFSET_MASK        ((1 << WORD_OFFSET_BITS) - 1)
 135 
 136 static inline unsigned int compute_word_offset(char *start, unsigned int *addr)
 137 {
 138         unsigned int delta_bytes, ret;
 139         delta_bytes = (unsigned long)addr - (unsigned long)start;
 140 
 141         ret = delta_bytes/WORD_SIZE;
 142 
 143         return ret;
 144 }
 145 
 146 
 147 
 148 
 149 
 150 
 151 
 152 
 153 
 154 
 155 
 156 
 157 
 158 
 159 #define SWEEP_ID_BITS           (WORD_BITS - (THREAD_ID_BITS + WORD_OFFSET_BITS))
 160 #define SWEEP_ID_MASK           ((1 << SWEEP_ID_BITS) - 1)
 161 
 162 
 163 
 164 
 165 
 166 
 167 
 168 
 169 
 170 
 171 
 172 
 173 
 174 
 175 
 176 
 177 
 178 
 179 
 180 
 181 
 182 
 183 
 184 
 185 
 186 
 187 
 188 
 189 
 190 
 191 
 192 
 193 
 194 #define SWEEP_ID_SHIFT  0
 195 #define WORD_OFFSET_SHIFT       (SWEEP_ID_BITS)
 196 #define THREAD_ID_SHIFT         (WORD_OFFSET_BITS + SWEEP_ID_BITS)
 197 
 198 
 199 
 200 
 201 
 202 static inline unsigned int compute_store_pattern(unsigned int tid,
 203                                                  unsigned int *addr,
 204                                                  unsigned int sweep_id)
 205 {
 206         unsigned int ret = 0;
 207         char *start = compute_chunk_start_addr(tid);
 208         unsigned int word_offset = compute_word_offset(start, addr);
 209 
 210         ret += (tid & THREAD_ID_MASK) << THREAD_ID_SHIFT;
 211         ret += (word_offset & WORD_OFFSET_MASK) << WORD_OFFSET_SHIFT;
 212         ret += (sweep_id & SWEEP_ID_MASK) << SWEEP_ID_SHIFT;
 213         return ret;
 214 }
 215 
 216 
 217 static inline unsigned int extract_tid(unsigned int pattern)
 218 {
 219         unsigned int ret;
 220 
 221         ret = (pattern >> THREAD_ID_SHIFT) & THREAD_ID_MASK;
 222         return ret;
 223 }
 224 
 225 
 226 static inline unsigned int extract_word_offset(unsigned int pattern)
 227 {
 228         unsigned int ret;
 229 
 230         ret = (pattern >> WORD_OFFSET_SHIFT) & WORD_OFFSET_MASK;
 231 
 232         return ret;
 233 }
 234 
 235 
 236 static inline unsigned int extract_sweep_id(unsigned int pattern)
 237 
 238 {
 239         unsigned int ret;
 240 
 241         ret = (pattern >> SWEEP_ID_SHIFT) & SWEEP_ID_MASK;
 242 
 243         return ret;
 244 }
 245 
 246 
 247 
 248 
 249 
 250 
 251 #define LOGDIR_NAME_SIZE 100
 252 static char logdir[LOGDIR_NAME_SIZE];
 253 
 254 static FILE *fp[MAX_THREADS];
 255 static const char logfilename[] ="Thread-%02d-Chunk";
 256 
 257 static inline void start_verification_log(unsigned int tid,
 258                                           unsigned int *addr,
 259                                           unsigned int cur_sweep_id,
 260                                           unsigned int prev_sweep_id)
 261 {
 262         FILE *f;
 263         char logfile[30];
 264         char path[LOGDIR_NAME_SIZE + 30];
 265         char separator[2] = "/";
 266         char *chunk_start = compute_chunk_start_addr(tid);
 267         unsigned int size = RIM_CHUNK_SIZE;
 268 
 269         sprintf(logfile, logfilename, tid);
 270         strcpy(path, logdir);
 271         strcat(path, separator);
 272         strcat(path, logfile);
 273         f = fopen(path, "w");
 274 
 275         if (!f) {
 276                 err_msg("Unable to create logfile\n");
 277         }
 278 
 279         fp[tid] = f;
 280 
 281         fprintf(f, "----------------------------------------------------------\n");
 282         fprintf(f, "PID                = %d\n", rim_process_pid);
 283         fprintf(f, "Thread id          = %02d\n", tid);
 284         fprintf(f, "Chunk Start Addr   = 0x%016lx\n", (unsigned long)chunk_start);
 285         fprintf(f, "Chunk Size         = %d\n", size);
 286         fprintf(f, "Next Store Addr    = 0x%016lx\n", (unsigned long)addr);
 287         fprintf(f, "Current sweep-id   = 0x%08x\n", cur_sweep_id);
 288         fprintf(f, "Previous sweep-id  = 0x%08x\n", prev_sweep_id);
 289         fprintf(f, "----------------------------------------------------------\n");
 290 }
 291 
 292 static inline void log_anamoly(unsigned int tid, unsigned int *addr,
 293                                unsigned int expected, unsigned int observed)
 294 {
 295         FILE *f = fp[tid];
 296 
 297         fprintf(f, "Thread %02d: Addr 0x%lx: Expected 0x%x, Observed 0x%x\n",
 298                 tid, (unsigned long)addr, expected, observed);
 299         fprintf(f, "Thread %02d: Expected Thread id   = %02d\n", tid, extract_tid(expected));
 300         fprintf(f, "Thread %02d: Observed Thread id   = %02d\n", tid, extract_tid(observed));
 301         fprintf(f, "Thread %02d: Expected Word offset = %03d\n", tid, extract_word_offset(expected));
 302         fprintf(f, "Thread %02d: Observed Word offset = %03d\n", tid, extract_word_offset(observed));
 303         fprintf(f, "Thread %02d: Expected sweep-id    = 0x%x\n", tid, extract_sweep_id(expected));
 304         fprintf(f, "Thread %02d: Observed sweep-id    = 0x%x\n", tid, extract_sweep_id(observed));
 305         fprintf(f, "----------------------------------------------------------\n");
 306 }
 307 
 308 static inline void end_verification_log(unsigned int tid, unsigned nr_anamolies)
 309 {
 310         FILE *f = fp[tid];
 311         char logfile[30];
 312         char path[LOGDIR_NAME_SIZE + 30];
 313         char separator[] = "/";
 314 
 315         fclose(f);
 316 
 317         if (nr_anamolies == 0) {
 318                 remove(path);
 319                 return;
 320         }
 321 
 322         sprintf(logfile, logfilename, tid);
 323         strcpy(path, logdir);
 324         strcat(path, separator);
 325         strcat(path, logfile);
 326 
 327         printf("Thread %02d chunk has %d corrupted words. For details check %s\n",
 328                 tid, nr_anamolies, path);
 329 }
 330 
 331 
 332 
 333 
 334 
 335 
 336 
 337 
 338 
 339 
 340 
 341 
 342 
 343 
 344 
 345 
 346 
 347 
 348 
 349 
 350 
 351 
 352 
 353 
 354 
 355 
 356 
 357 
 358 static void verify_chunk(unsigned int tid, unsigned int *next_store_addr,
 359                   unsigned int cur_sweep_id,
 360                   unsigned int prev_sweep_id)
 361 {
 362         unsigned int *iter_ptr;
 363         unsigned int size = RIM_CHUNK_SIZE;
 364         unsigned int expected;
 365         unsigned int observed;
 366         char *chunk_start = compute_chunk_start_addr(tid);
 367 
 368         int nr_anamolies = 0;
 369 
 370         start_verification_log(tid, next_store_addr,
 371                                cur_sweep_id, prev_sweep_id);
 372 
 373         for (iter_ptr = (unsigned int *)chunk_start;
 374              (unsigned long)iter_ptr < (unsigned long)chunk_start + size;
 375              iter_ptr++) {
 376                 unsigned int expected_sweep_id;
 377 
 378                 if (iter_ptr < next_store_addr) {
 379                         expected_sweep_id = cur_sweep_id;
 380                 } else {
 381                         expected_sweep_id = prev_sweep_id;
 382                 }
 383 
 384                 expected = compute_store_pattern(tid, iter_ptr, expected_sweep_id);
 385 
 386                 dcbf((volatile unsigned int*)iter_ptr); 
 387                 observed = *iter_ptr;
 388 
 389                 if (observed != expected) {
 390                         nr_anamolies++;
 391                         log_anamoly(tid, iter_ptr, expected, observed);
 392                 }
 393         }
 394 
 395         end_verification_log(tid, nr_anamolies);
 396 }
 397 
 398 static void set_pthread_cpu(pthread_t th, int cpu)
 399 {
 400         cpu_set_t run_cpu_mask;
 401         struct sched_param param;
 402 
 403         CPU_ZERO(&run_cpu_mask);
 404         CPU_SET(cpu, &run_cpu_mask);
 405         pthread_setaffinity_np(th, sizeof(cpu_set_t), &run_cpu_mask);
 406 
 407         param.sched_priority = 1;
 408         if (0 && sched_setscheduler(0, SCHED_FIFO, ¶m) == -1) {
 409                 
 410                 fprintf(stderr, "could not set SCHED_FIFO, run as root?\n");
 411         }
 412 }
 413 
 414 static void set_mycpu(int cpu)
 415 {
 416         cpu_set_t run_cpu_mask;
 417         struct sched_param param;
 418 
 419         CPU_ZERO(&run_cpu_mask);
 420         CPU_SET(cpu, &run_cpu_mask);
 421         sched_setaffinity(0, sizeof(cpu_set_t), &run_cpu_mask);
 422 
 423         param.sched_priority = 1;
 424         if (0 && sched_setscheduler(0, SCHED_FIFO, ¶m) == -1) {
 425                 fprintf(stderr, "could not set SCHED_FIFO, run as root?\n");
 426         }
 427 }
 428 
 429 static volatile int segv_wait;
 430 
 431 static void segv_handler(int signo, siginfo_t *info, void *extra)
 432 {
 433         while (segv_wait) {
 434                 sched_yield();
 435         }
 436 
 437 }
 438 
 439 static void set_segv_handler(void)
 440 {
 441         struct sigaction sa;
 442 
 443         sa.sa_flags = SA_SIGINFO;
 444         sa.sa_sigaction = segv_handler;
 445 
 446         if (sigaction(SIGSEGV, &sa, NULL) == -1) {
 447                 perror("sigaction");
 448                 exit(EXIT_FAILURE);
 449         }
 450 }
 451 
 452 int timeout = 0;
 453 
 454 
 455 
 456 
 457 
 458 
 459 static void *rim_fn(void *arg)
 460 {
 461         unsigned int tid = *((unsigned int *)arg);
 462 
 463         int size = RIM_CHUNK_SIZE;
 464         char *chunk_start = compute_chunk_start_addr(tid);
 465 
 466         unsigned int prev_sweep_id;
 467         unsigned int cur_sweep_id = 0;
 468 
 469         
 470         unsigned int pattern = cur_sweep_id;
 471         unsigned int *pattern_ptr = &pattern;
 472         unsigned int *w_ptr, read_data;
 473 
 474         set_segv_handler();
 475 
 476         
 477 
 478 
 479 
 480 
 481 
 482 
 483 
 484 
 485         for (w_ptr = (unsigned int *)chunk_start;
 486              (unsigned long)w_ptr < (unsigned long)(chunk_start) + size;
 487              w_ptr++) {
 488 
 489                 *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id);
 490                 *w_ptr = *pattern_ptr;
 491         }
 492 
 493         while (!corruption_found && !timeout) {
 494                 prev_sweep_id = cur_sweep_id;
 495                 cur_sweep_id = cur_sweep_id + 1;
 496 
 497                 for (w_ptr = (unsigned int *)chunk_start;
 498                      (unsigned long)w_ptr < (unsigned long)(chunk_start) + size;
 499                      w_ptr++)  {
 500                         unsigned int old_pattern;
 501 
 502                         
 503 
 504 
 505 
 506 
 507                         old_pattern = compute_store_pattern(tid, w_ptr, prev_sweep_id);
 508 
 509                         
 510 
 511 
 512 
 513                         dcbf((volatile unsigned int*)w_ptr); 
 514 
 515                         
 516                         read_data = *w_ptr; 
 517 
 518                         
 519 
 520 
 521 
 522                         if (read_data != old_pattern) {
 523                                 
 524                                 corruption_found = 1;
 525                         }
 526 
 527                         
 528 
 529 
 530 
 531                         if (corruption_found || timeout) {
 532                                 
 533 
 534 
 535 
 536 
 537 
 538 
 539                                 
 540                                 verify_chunk(tid, w_ptr, cur_sweep_id, prev_sweep_id);
 541 
 542                                 return 0;
 543                         }
 544 
 545                         
 546 
 547 
 548 
 549                         *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id);
 550 
 551                         
 552 
 553 
 554 
 555                         *w_ptr = *pattern_ptr;
 556                 }
 557         }
 558 
 559         return NULL;
 560 }
 561 
 562 
 563 static unsigned long start_cpu = 0;
 564 static unsigned long nrthreads = 4;
 565 
 566 static pthread_t mem_snapshot_thread;
 567 
 568 static void *mem_snapshot_fn(void *arg)
 569 {
 570         int page_size = getpagesize();
 571         size_t size = page_size;
 572         void *tmp = malloc(size);
 573 
 574         while (!corruption_found && !timeout) {
 575                 
 576                 segv_wait = 1;
 577 
 578                 mprotect(map1, size, PROT_READ);
 579 
 580                 
 581 
 582 
 583 
 584                 memcpy(tmp, map1, size);
 585 
 586                 
 587 
 588 
 589 
 590 
 591 
 592                 memcpy(map2, tmp, size);
 593                 
 594 
 595 
 596 
 597                 asm volatile("sync" ::: "memory");
 598                 mprotect(map1, size, PROT_READ|PROT_WRITE);
 599                 asm volatile("sync" ::: "memory");
 600                 segv_wait = 0;
 601 
 602                 usleep(1); 
 603         }
 604 
 605         return 0;
 606 }
 607 
 608 void alrm_sighandler(int sig)
 609 {
 610         timeout = 1;
 611 }
 612 
 613 int main(int argc, char *argv[])
 614 {
 615         int c;
 616         int page_size = getpagesize();
 617         time_t now;
 618         int i, dir_error;
 619         pthread_attr_t attr;
 620         key_t shm_key = (key_t) getpid();
 621         int shmid, run_time = 20 * 60;
 622         struct sigaction sa_alrm;
 623 
 624         snprintf(logdir, LOGDIR_NAME_SIZE,
 625                  "/tmp/logdir-%u", (unsigned int)getpid());
 626         while ((c = getopt(argc, argv, "r:hn:l:t:")) != -1) {
 627                 switch(c) {
 628                 case 'r':
 629                         start_cpu = strtoul(optarg, NULL, 10);
 630                         break;
 631                 case 'h':
 632                         printf("%s [-r <start_cpu>] [-n <nrthreads>] [-l <logdir>] [-t <timeout>]\n", argv[0]);
 633                         exit(0);
 634                         break;
 635                 case 'n':
 636                         nrthreads = strtoul(optarg, NULL, 10);
 637                         break;
 638                 case 'l':
 639                         strncpy(logdir, optarg, LOGDIR_NAME_SIZE - 1);
 640                         break;
 641                 case 't':
 642                         run_time = strtoul(optarg, NULL, 10);
 643                         break;
 644                 default:
 645                         printf("invalid option\n");
 646                         exit(0);
 647                         break;
 648                 }
 649         }
 650 
 651         if (nrthreads > MAX_THREADS)
 652                 nrthreads = MAX_THREADS;
 653 
 654         shmid = shmget(shm_key, page_size, IPC_CREAT|0666);
 655         if (shmid < 0) {
 656                 err_msg("Failed shmget\n");
 657         }
 658 
 659         map1 = shmat(shmid, NULL, 0);
 660         if (map1 == (void *) -1) {
 661                 err_msg("Failed shmat");
 662         }
 663 
 664         map2 = shmat(shmid, NULL, 0);
 665         if (map2 == (void *) -1) {
 666                 err_msg("Failed shmat");
 667         }
 668 
 669         dir_error = mkdir(logdir, 0755);
 670 
 671         if (dir_error) {
 672                 err_msg("Failed mkdir");
 673         }
 674 
 675         printf("start_cpu list:%lu\n", start_cpu);
 676         printf("number of worker threads:%lu + 1 snapshot thread\n", nrthreads);
 677         printf("Allocated address:0x%016lx + secondary map:0x%016lx\n", (unsigned long)map1, (unsigned long)map2);
 678         printf("logdir at : %s\n", logdir);
 679         printf("Timeout: %d seconds\n", run_time);
 680 
 681         time(&now);
 682         printf("=================================\n");
 683         printf("     Starting Test\n");
 684         printf("     %s", ctime(&now));
 685         printf("=================================\n");
 686 
 687         for (i = 0; i < nrthreads; i++) {
 688                 if (1 && !fork()) {
 689                         prctl(PR_SET_PDEATHSIG, SIGKILL);
 690                         set_mycpu(start_cpu + i);
 691                         for (;;)
 692                                 sched_yield();
 693                         exit(0);
 694                 }
 695         }
 696 
 697 
 698         sa_alrm.sa_handler = &alrm_sighandler;
 699         sigemptyset(&sa_alrm.sa_mask);
 700         sa_alrm.sa_flags = 0;
 701 
 702         if (sigaction(SIGALRM, &sa_alrm, 0) == -1) {
 703                 err_msg("Failed signal handler registration\n");
 704         }
 705 
 706         alarm(run_time);
 707 
 708         pthread_attr_init(&attr);
 709         for (i = 0; i < nrthreads; i++) {
 710                 rim_thread_ids[i] = i;
 711                 pthread_create(&rim_threads[i], &attr, rim_fn, &rim_thread_ids[i]);
 712                 set_pthread_cpu(rim_threads[i], start_cpu + i);
 713         }
 714 
 715         pthread_create(&mem_snapshot_thread, &attr, mem_snapshot_fn, map1);
 716         set_pthread_cpu(mem_snapshot_thread, start_cpu + i);
 717 
 718 
 719         pthread_join(mem_snapshot_thread, NULL);
 720         for (i = 0; i < nrthreads; i++) {
 721                 pthread_join(rim_threads[i], NULL);
 722         }
 723 
 724         if (!timeout) {
 725                 time(&now);
 726                 printf("=================================\n");
 727                 printf("      Data Corruption Detected\n");
 728                 printf("      %s", ctime(&now));
 729                 printf("      See logfiles in %s\n", logdir);
 730                 printf("=================================\n");
 731                 return 1;
 732         }
 733         return 0;
 734 }