root/lib/raid6/sse2.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. raid6_have_sse2
  2. raid6_sse21_gen_syndrome
  3. raid6_sse21_xor_syndrome
  4. raid6_sse22_gen_syndrome
  5. raid6_sse22_xor_syndrome
  6. raid6_sse24_gen_syndrome
  7. raid6_sse24_xor_syndrome

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /* -*- linux-c -*- ------------------------------------------------------- *
   3  *
   4  *   Copyright 2002 H. Peter Anvin - All Rights Reserved
   5  *
   6  * ----------------------------------------------------------------------- */
   7 
   8 /*
   9  * raid6/sse2.c
  10  *
  11  * SSE-2 implementation of RAID-6 syndrome functions
  12  *
  13  */
  14 
  15 #include <linux/raid/pq.h>
  16 #include "x86.h"
  17 
  18 static const struct raid6_sse_constants {
  19         u64 x1d[2];
  20 } raid6_sse_constants  __attribute__((aligned(16))) = {
  21         { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL },
  22 };
  23 
  24 static int raid6_have_sse2(void)
  25 {
  26         /* Not really boot_cpu but "all_cpus" */
  27         return boot_cpu_has(X86_FEATURE_MMX) &&
  28                 boot_cpu_has(X86_FEATURE_FXSR) &&
  29                 boot_cpu_has(X86_FEATURE_XMM) &&
  30                 boot_cpu_has(X86_FEATURE_XMM2);
  31 }
  32 
  33 /*
  34  * Plain SSE2 implementation
  35  */
  36 static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
  37 {
  38         u8 **dptr = (u8 **)ptrs;
  39         u8 *p, *q;
  40         int d, z, z0;
  41 
  42         z0 = disks - 3;         /* Highest data disk */
  43         p = dptr[z0+1];         /* XOR parity */
  44         q = dptr[z0+2];         /* RS syndrome */
  45 
  46         kernel_fpu_begin();
  47 
  48         asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
  49         asm volatile("pxor %xmm5,%xmm5");       /* Zero temp */
  50 
  51         for ( d = 0 ; d < bytes ; d += 16 ) {
  52                 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
  53                 asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
  54                 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
  55                 asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
  56                 asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d]));
  57                 for ( z = z0-2 ; z >= 0 ; z-- ) {
  58                         asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
  59                         asm volatile("pcmpgtb %xmm4,%xmm5");
  60                         asm volatile("paddb %xmm4,%xmm4");
  61                         asm volatile("pand %xmm0,%xmm5");
  62                         asm volatile("pxor %xmm5,%xmm4");
  63                         asm volatile("pxor %xmm5,%xmm5");
  64                         asm volatile("pxor %xmm6,%xmm2");
  65                         asm volatile("pxor %xmm6,%xmm4");
  66                         asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d]));
  67                 }
  68                 asm volatile("pcmpgtb %xmm4,%xmm5");
  69                 asm volatile("paddb %xmm4,%xmm4");
  70                 asm volatile("pand %xmm0,%xmm5");
  71                 asm volatile("pxor %xmm5,%xmm4");
  72                 asm volatile("pxor %xmm5,%xmm5");
  73                 asm volatile("pxor %xmm6,%xmm2");
  74                 asm volatile("pxor %xmm6,%xmm4");
  75 
  76                 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
  77                 asm volatile("pxor %xmm2,%xmm2");
  78                 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
  79                 asm volatile("pxor %xmm4,%xmm4");
  80         }
  81 
  82         asm volatile("sfence" : : : "memory");
  83         kernel_fpu_end();
  84 }
  85 
  86 
  87 static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
  88                                      size_t bytes, void **ptrs)
  89 {
  90         u8 **dptr = (u8 **)ptrs;
  91         u8 *p, *q;
  92         int d, z, z0;
  93 
  94         z0 = stop;              /* P/Q right side optimization */
  95         p = dptr[disks-2];      /* XOR parity */
  96         q = dptr[disks-1];      /* RS syndrome */
  97 
  98         kernel_fpu_begin();
  99 
 100         asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
 101 
 102         for ( d = 0 ; d < bytes ; d += 16 ) {
 103                 asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
 104                 asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
 105                 asm volatile("pxor %xmm4,%xmm2");
 106                 /* P/Q data pages */
 107                 for ( z = z0-1 ; z >= start ; z-- ) {
 108                         asm volatile("pxor %xmm5,%xmm5");
 109                         asm volatile("pcmpgtb %xmm4,%xmm5");
 110                         asm volatile("paddb %xmm4,%xmm4");
 111                         asm volatile("pand %xmm0,%xmm5");
 112                         asm volatile("pxor %xmm5,%xmm4");
 113                         asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
 114                         asm volatile("pxor %xmm5,%xmm2");
 115                         asm volatile("pxor %xmm5,%xmm4");
 116                 }
 117                 /* P/Q left side optimization */
 118                 for ( z = start-1 ; z >= 0 ; z-- ) {
 119                         asm volatile("pxor %xmm5,%xmm5");
 120                         asm volatile("pcmpgtb %xmm4,%xmm5");
 121                         asm volatile("paddb %xmm4,%xmm4");
 122                         asm volatile("pand %xmm0,%xmm5");
 123                         asm volatile("pxor %xmm5,%xmm4");
 124                 }
 125                 asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
 126                 /* Don't use movntdq for r/w memory area < cache line */
 127                 asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
 128                 asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
 129         }
 130 
 131         asm volatile("sfence" : : : "memory");
 132         kernel_fpu_end();
 133 }
 134 
 135 const struct raid6_calls raid6_sse2x1 = {
 136         raid6_sse21_gen_syndrome,
 137         raid6_sse21_xor_syndrome,
 138         raid6_have_sse2,
 139         "sse2x1",
 140         1                       /* Has cache hints */
 141 };
 142 
 143 /*
 144  * Unrolled-by-2 SSE2 implementation
 145  */
 146 static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
 147 {
 148         u8 **dptr = (u8 **)ptrs;
 149         u8 *p, *q;
 150         int d, z, z0;
 151 
 152         z0 = disks - 3;         /* Highest data disk */
 153         p = dptr[z0+1];         /* XOR parity */
 154         q = dptr[z0+2];         /* RS syndrome */
 155 
 156         kernel_fpu_begin();
 157 
 158         asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
 159         asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
 160         asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
 161 
 162         /* We uniformly assume a single prefetch covers at least 32 bytes */
 163         for ( d = 0 ; d < bytes ; d += 32 ) {
 164                 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
 165                 asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d]));    /* P[0] */
 166                 asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */
 167                 asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
 168                 asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */
 169                 for ( z = z0-1 ; z >= 0 ; z-- ) {
 170                         asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
 171                         asm volatile("pcmpgtb %xmm4,%xmm5");
 172                         asm volatile("pcmpgtb %xmm6,%xmm7");
 173                         asm volatile("paddb %xmm4,%xmm4");
 174                         asm volatile("paddb %xmm6,%xmm6");
 175                         asm volatile("pand %xmm0,%xmm5");
 176                         asm volatile("pand %xmm0,%xmm7");
 177                         asm volatile("pxor %xmm5,%xmm4");
 178                         asm volatile("pxor %xmm7,%xmm6");
 179                         asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d]));
 180                         asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16]));
 181                         asm volatile("pxor %xmm5,%xmm2");
 182                         asm volatile("pxor %xmm7,%xmm3");
 183                         asm volatile("pxor %xmm5,%xmm4");
 184                         asm volatile("pxor %xmm7,%xmm6");
 185                         asm volatile("pxor %xmm5,%xmm5");
 186                         asm volatile("pxor %xmm7,%xmm7");
 187                 }
 188                 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
 189                 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
 190                 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
 191                 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
 192         }
 193 
 194         asm volatile("sfence" : : : "memory");
 195         kernel_fpu_end();
 196 }
 197 
 198 static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
 199                                      size_t bytes, void **ptrs)
 200 {
 201         u8 **dptr = (u8 **)ptrs;
 202         u8 *p, *q;
 203         int d, z, z0;
 204 
 205         z0 = stop;              /* P/Q right side optimization */
 206         p = dptr[disks-2];      /* XOR parity */
 207         q = dptr[disks-1];      /* RS syndrome */
 208 
 209         kernel_fpu_begin();
 210 
 211         asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
 212 
 213         for ( d = 0 ; d < bytes ; d += 32 ) {
 214                 asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
 215                 asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
 216                 asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
 217                 asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
 218                 asm volatile("pxor %xmm4,%xmm2");
 219                 asm volatile("pxor %xmm6,%xmm3");
 220                 /* P/Q data pages */
 221                 for ( z = z0-1 ; z >= start ; z-- ) {
 222                         asm volatile("pxor %xmm5,%xmm5");
 223                         asm volatile("pxor %xmm7,%xmm7");
 224                         asm volatile("pcmpgtb %xmm4,%xmm5");
 225                         asm volatile("pcmpgtb %xmm6,%xmm7");
 226                         asm volatile("paddb %xmm4,%xmm4");
 227                         asm volatile("paddb %xmm6,%xmm6");
 228                         asm volatile("pand %xmm0,%xmm5");
 229                         asm volatile("pand %xmm0,%xmm7");
 230                         asm volatile("pxor %xmm5,%xmm4");
 231                         asm volatile("pxor %xmm7,%xmm6");
 232                         asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
 233                         asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
 234                         asm volatile("pxor %xmm5,%xmm2");
 235                         asm volatile("pxor %xmm7,%xmm3");
 236                         asm volatile("pxor %xmm5,%xmm4");
 237                         asm volatile("pxor %xmm7,%xmm6");
 238                 }
 239                 /* P/Q left side optimization */
 240                 for ( z = start-1 ; z >= 0 ; z-- ) {
 241                         asm volatile("pxor %xmm5,%xmm5");
 242                         asm volatile("pxor %xmm7,%xmm7");
 243                         asm volatile("pcmpgtb %xmm4,%xmm5");
 244                         asm volatile("pcmpgtb %xmm6,%xmm7");
 245                         asm volatile("paddb %xmm4,%xmm4");
 246                         asm volatile("paddb %xmm6,%xmm6");
 247                         asm volatile("pand %xmm0,%xmm5");
 248                         asm volatile("pand %xmm0,%xmm7");
 249                         asm volatile("pxor %xmm5,%xmm4");
 250                         asm volatile("pxor %xmm7,%xmm6");
 251                 }
 252                 asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
 253                 asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
 254                 /* Don't use movntdq for r/w memory area < cache line */
 255                 asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
 256                 asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16]));
 257                 asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
 258                 asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16]));
 259         }
 260 
 261         asm volatile("sfence" : : : "memory");
 262         kernel_fpu_end();
 263 }
 264 
 265 const struct raid6_calls raid6_sse2x2 = {
 266         raid6_sse22_gen_syndrome,
 267         raid6_sse22_xor_syndrome,
 268         raid6_have_sse2,
 269         "sse2x2",
 270         1                       /* Has cache hints */
 271 };
 272 
 273 #ifdef CONFIG_X86_64
 274 
 275 /*
 276  * Unrolled-by-4 SSE2 implementation
 277  */
 278 static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
 279 {
 280         u8 **dptr = (u8 **)ptrs;
 281         u8 *p, *q;
 282         int d, z, z0;
 283 
 284         z0 = disks - 3;         /* Highest data disk */
 285         p = dptr[z0+1];         /* XOR parity */
 286         q = dptr[z0+2];         /* RS syndrome */
 287 
 288         kernel_fpu_begin();
 289 
 290         asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
 291         asm volatile("pxor %xmm2,%xmm2");       /* P[0] */
 292         asm volatile("pxor %xmm3,%xmm3");       /* P[1] */
 293         asm volatile("pxor %xmm4,%xmm4");       /* Q[0] */
 294         asm volatile("pxor %xmm5,%xmm5");       /* Zero temp */
 295         asm volatile("pxor %xmm6,%xmm6");       /* Q[1] */
 296         asm volatile("pxor %xmm7,%xmm7");       /* Zero temp */
 297         asm volatile("pxor %xmm10,%xmm10");     /* P[2] */
 298         asm volatile("pxor %xmm11,%xmm11");     /* P[3] */
 299         asm volatile("pxor %xmm12,%xmm12");     /* Q[2] */
 300         asm volatile("pxor %xmm13,%xmm13");     /* Zero temp */
 301         asm volatile("pxor %xmm14,%xmm14");     /* Q[3] */
 302         asm volatile("pxor %xmm15,%xmm15");     /* Zero temp */
 303 
 304         for ( d = 0 ; d < bytes ; d += 64 ) {
 305                 for ( z = z0 ; z >= 0 ; z-- ) {
 306                         /* The second prefetch seems to improve performance... */
 307                         asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
 308                         asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
 309                         asm volatile("pcmpgtb %xmm4,%xmm5");
 310                         asm volatile("pcmpgtb %xmm6,%xmm7");
 311                         asm volatile("pcmpgtb %xmm12,%xmm13");
 312                         asm volatile("pcmpgtb %xmm14,%xmm15");
 313                         asm volatile("paddb %xmm4,%xmm4");
 314                         asm volatile("paddb %xmm6,%xmm6");
 315                         asm volatile("paddb %xmm12,%xmm12");
 316                         asm volatile("paddb %xmm14,%xmm14");
 317                         asm volatile("pand %xmm0,%xmm5");
 318                         asm volatile("pand %xmm0,%xmm7");
 319                         asm volatile("pand %xmm0,%xmm13");
 320                         asm volatile("pand %xmm0,%xmm15");
 321                         asm volatile("pxor %xmm5,%xmm4");
 322                         asm volatile("pxor %xmm7,%xmm6");
 323                         asm volatile("pxor %xmm13,%xmm12");
 324                         asm volatile("pxor %xmm15,%xmm14");
 325                         asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
 326                         asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
 327                         asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
 328                         asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
 329                         asm volatile("pxor %xmm5,%xmm2");
 330                         asm volatile("pxor %xmm7,%xmm3");
 331                         asm volatile("pxor %xmm13,%xmm10");
 332                         asm volatile("pxor %xmm15,%xmm11");
 333                         asm volatile("pxor %xmm5,%xmm4");
 334                         asm volatile("pxor %xmm7,%xmm6");
 335                         asm volatile("pxor %xmm13,%xmm12");
 336                         asm volatile("pxor %xmm15,%xmm14");
 337                         asm volatile("pxor %xmm5,%xmm5");
 338                         asm volatile("pxor %xmm7,%xmm7");
 339                         asm volatile("pxor %xmm13,%xmm13");
 340                         asm volatile("pxor %xmm15,%xmm15");
 341                 }
 342                 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
 343                 asm volatile("pxor %xmm2,%xmm2");
 344                 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
 345                 asm volatile("pxor %xmm3,%xmm3");
 346                 asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
 347                 asm volatile("pxor %xmm10,%xmm10");
 348                 asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
 349                 asm volatile("pxor %xmm11,%xmm11");
 350                 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
 351                 asm volatile("pxor %xmm4,%xmm4");
 352                 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
 353                 asm volatile("pxor %xmm6,%xmm6");
 354                 asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
 355                 asm volatile("pxor %xmm12,%xmm12");
 356                 asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
 357                 asm volatile("pxor %xmm14,%xmm14");
 358         }
 359 
 360         asm volatile("sfence" : : : "memory");
 361         kernel_fpu_end();
 362 }
 363 
 364 static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
 365                                      size_t bytes, void **ptrs)
 366 {
 367         u8 **dptr = (u8 **)ptrs;
 368         u8 *p, *q;
 369         int d, z, z0;
 370 
 371         z0 = stop;              /* P/Q right side optimization */
 372         p = dptr[disks-2];      /* XOR parity */
 373         q = dptr[disks-1];      /* RS syndrome */
 374 
 375         kernel_fpu_begin();
 376 
 377         asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
 378 
 379         for ( d = 0 ; d < bytes ; d += 64 ) {
 380                 asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
 381                 asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
 382                 asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32]));
 383                 asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48]));
 384                 asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
 385                 asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
 386                 asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32]));
 387                 asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48]));
 388                 asm volatile("pxor %xmm4,%xmm2");
 389                 asm volatile("pxor %xmm6,%xmm3");
 390                 asm volatile("pxor %xmm12,%xmm10");
 391                 asm volatile("pxor %xmm14,%xmm11");
 392                 /* P/Q data pages */
 393                 for ( z = z0-1 ; z >= start ; z-- ) {
 394                         asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
 395                         asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
 396                         asm volatile("pxor %xmm5,%xmm5");
 397                         asm volatile("pxor %xmm7,%xmm7");
 398                         asm volatile("pxor %xmm13,%xmm13");
 399                         asm volatile("pxor %xmm15,%xmm15");
 400                         asm volatile("pcmpgtb %xmm4,%xmm5");
 401                         asm volatile("pcmpgtb %xmm6,%xmm7");
 402                         asm volatile("pcmpgtb %xmm12,%xmm13");
 403                         asm volatile("pcmpgtb %xmm14,%xmm15");
 404                         asm volatile("paddb %xmm4,%xmm4");
 405                         asm volatile("paddb %xmm6,%xmm6");
 406                         asm volatile("paddb %xmm12,%xmm12");
 407                         asm volatile("paddb %xmm14,%xmm14");
 408                         asm volatile("pand %xmm0,%xmm5");
 409                         asm volatile("pand %xmm0,%xmm7");
 410                         asm volatile("pand %xmm0,%xmm13");
 411                         asm volatile("pand %xmm0,%xmm15");
 412                         asm volatile("pxor %xmm5,%xmm4");
 413                         asm volatile("pxor %xmm7,%xmm6");
 414                         asm volatile("pxor %xmm13,%xmm12");
 415                         asm volatile("pxor %xmm15,%xmm14");
 416                         asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
 417                         asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
 418                         asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
 419                         asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
 420                         asm volatile("pxor %xmm5,%xmm2");
 421                         asm volatile("pxor %xmm7,%xmm3");
 422                         asm volatile("pxor %xmm13,%xmm10");
 423                         asm volatile("pxor %xmm15,%xmm11");
 424                         asm volatile("pxor %xmm5,%xmm4");
 425                         asm volatile("pxor %xmm7,%xmm6");
 426                         asm volatile("pxor %xmm13,%xmm12");
 427                         asm volatile("pxor %xmm15,%xmm14");
 428                 }
 429                 asm volatile("prefetchnta %0" :: "m" (q[d]));
 430                 asm volatile("prefetchnta %0" :: "m" (q[d+32]));
 431                 /* P/Q left side optimization */
 432                 for ( z = start-1 ; z >= 0 ; z-- ) {
 433                         asm volatile("pxor %xmm5,%xmm5");
 434                         asm volatile("pxor %xmm7,%xmm7");
 435                         asm volatile("pxor %xmm13,%xmm13");
 436                         asm volatile("pxor %xmm15,%xmm15");
 437                         asm volatile("pcmpgtb %xmm4,%xmm5");
 438                         asm volatile("pcmpgtb %xmm6,%xmm7");
 439                         asm volatile("pcmpgtb %xmm12,%xmm13");
 440                         asm volatile("pcmpgtb %xmm14,%xmm15");
 441                         asm volatile("paddb %xmm4,%xmm4");
 442                         asm volatile("paddb %xmm6,%xmm6");
 443                         asm volatile("paddb %xmm12,%xmm12");
 444                         asm volatile("paddb %xmm14,%xmm14");
 445                         asm volatile("pand %xmm0,%xmm5");
 446                         asm volatile("pand %xmm0,%xmm7");
 447                         asm volatile("pand %xmm0,%xmm13");
 448                         asm volatile("pand %xmm0,%xmm15");
 449                         asm volatile("pxor %xmm5,%xmm4");
 450                         asm volatile("pxor %xmm7,%xmm6");
 451                         asm volatile("pxor %xmm13,%xmm12");
 452                         asm volatile("pxor %xmm15,%xmm14");
 453                 }
 454                 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
 455                 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
 456                 asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
 457                 asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
 458                 asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
 459                 asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
 460                 asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32]));
 461                 asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48]));
 462                 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
 463                 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
 464                 asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
 465                 asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
 466         }
 467         asm volatile("sfence" : : : "memory");
 468         kernel_fpu_end();
 469 }
 470 
 471 
 472 const struct raid6_calls raid6_sse2x4 = {
 473         raid6_sse24_gen_syndrome,
 474         raid6_sse24_xor_syndrome,
 475         raid6_have_sse2,
 476         "sse2x4",
 477         1                       /* Has cache hints */
 478 };
 479 
 480 #endif /* CONFIG_X86_64 */

/* [<][>][^][v][top][bottom][index][help] */