arch/powerpc/kernel/vecemu.c

/* [<][>][^][v][top][bottom][index][help] */
This source file includes following definitions.
eexp2
elog2
ctsxs
ctuxs
rfiz
rfii
rfin
emulate_altivec
   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Routines to emulate some Altivec/VMX instructions, specifically
   4  * those that can trap when given denormalized operands in Java mode.
   5  */
   6 #include <linux/kernel.h>
   7 #include <linux/errno.h>
   8 #include <linux/sched.h>
   9 #include <asm/ptrace.h>
  10 #include <asm/processor.h>
  11 #include <asm/switch_to.h>
  12 #include <linux/uaccess.h>
  13 
  14 /* Functions in vector.S */
  15 extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
  16 extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
  17 extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
  18 extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
  19 extern void vrefp(vector128 *dst, vector128 *src);
  20 extern void vrsqrtefp(vector128 *dst, vector128 *src);
  21 extern void vexptep(vector128 *dst, vector128 *src);
  22 
  23 static unsigned int exp2s[8] = {
  24         0x800000,
  25         0x8b95c2,
  26         0x9837f0,
  27         0xa5fed7,
  28         0xb504f3,
  29         0xc5672a,
  30         0xd744fd,
  31         0xeac0c7
  32 };
  33 
  34 /*
  35  * Computes an estimate of 2^x.  The `s' argument is the 32-bit
  36  * single-precision floating-point representation of x.
  37  */
  38 static unsigned int eexp2(unsigned int s)
  39 {
  40         int exp, pwr;
  41         unsigned int mant, frac;
  42 
  43         /* extract exponent field from input */
  44         exp = ((s >> 23) & 0xff) - 127;
  45         if (exp > 7) {
  46                 /* check for NaN input */
  47                 if (exp == 128 && (s & 0x7fffff) != 0)
  48                         return s | 0x400000;    /* return QNaN */
  49                 /* 2^-big = 0, 2^+big = +Inf */
  50                 return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
  51         }
  52         if (exp < -23)
  53                 return 0x3f800000;      /* 1.0 */
  54 
  55         /* convert to fixed point integer in 9.23 representation */
  56         pwr = (s & 0x7fffff) | 0x800000;
  57         if (exp > 0)
  58                 pwr <<= exp;
  59         else
  60                 pwr >>= -exp;
  61         if (s & 0x80000000)
  62                 pwr = -pwr;
  63 
  64         /* extract integer part, which becomes exponent part of result */
  65         exp = (pwr >> 23) + 126;
  66         if (exp >= 254)
  67                 return 0x7f800000;
  68         if (exp < -23)
  69                 return 0;
  70 
  71         /* table lookup on top 3 bits of fraction to get mantissa */
  72         mant = exp2s[(pwr >> 20) & 7];
  73 
  74         /* linear interpolation using remaining 20 bits of fraction */
  75         asm("mulhwu %0,%1,%2" : "=r" (frac)
  76             : "r" (pwr << 12), "r" (0x172b83ff));
  77         asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
  78         mant += frac;
  79 
  80         if (exp >= 0)
  81                 return mant + (exp << 23);
  82 
  83         /* denormalized result */
  84         exp = -exp;
  85         mant += 1 << (exp - 1);
  86         return mant >> exp;
  87 }
  88 
  89 /*
  90  * Computes an estimate of log_2(x).  The `s' argument is the 32-bit
  91  * single-precision floating-point representation of x.
  92  */
  93 static unsigned int elog2(unsigned int s)
  94 {
  95         int exp, mant, lz, frac;
  96 
  97         exp = s & 0x7f800000;
  98         mant = s & 0x7fffff;
  99         if (exp == 0x7f800000) {        /* Inf or NaN */
 100                 if (mant != 0)
 101                         s |= 0x400000;  /* turn NaN into QNaN */
 102                 return s;
 103         }
 104         if ((exp | mant) == 0)          /* +0 or -0 */
 105                 return 0xff800000;      /* return -Inf */
 106 
 107         if (exp == 0) {
 108                 /* denormalized */
 109                 asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
 110                 mant <<= lz - 8;
 111                 exp = (-118 - lz) << 23;
 112         } else {
 113                 mant |= 0x800000;
 114                 exp -= 127 << 23;
 115         }
 116 
 117         if (mant >= 0xb504f3) {                         /* 2^0.5 * 2^23 */
 118                 exp |= 0x400000;                        /* 0.5 * 2^23 */
 119                 asm("mulhwu %0,%1,%2" : "=r" (mant)
 120                     : "r" (mant), "r" (0xb504f334));    /* 2^-0.5 * 2^32 */
 121         }
 122         if (mant >= 0x9837f0) {                         /* 2^0.25 * 2^23 */
 123                 exp |= 0x200000;                        /* 0.25 * 2^23 */
 124                 asm("mulhwu %0,%1,%2" : "=r" (mant)
 125                     : "r" (mant), "r" (0xd744fccb));    /* 2^-0.25 * 2^32 */
 126         }
 127         if (mant >= 0x8b95c2) {                         /* 2^0.125 * 2^23 */
 128                 exp |= 0x100000;                        /* 0.125 * 2^23 */
 129                 asm("mulhwu %0,%1,%2" : "=r" (mant)
 130                     : "r" (mant), "r" (0xeac0c6e8));    /* 2^-0.125 * 2^32 */
 131         }
 132         if (mant > 0x800000) {                          /* 1.0 * 2^23 */
 133                 /* calculate (mant - 1) * 1.381097463 */
 134                 /* 1.381097463 == 0.125 / (2^0.125 - 1) */
 135                 asm("mulhwu %0,%1,%2" : "=r" (frac)
 136                     : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
 137                 exp += frac;
 138         }
 139         s = exp & 0x80000000;
 140         if (exp != 0) {
 141                 if (s)
 142                         exp = -exp;
 143                 asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
 144                 lz = 8 - lz;
 145                 if (lz > 0)
 146                         exp >>= lz;
 147                 else if (lz < 0)
 148                         exp <<= -lz;
 149                 s += ((lz + 126) << 23) + exp;
 150         }
 151         return s;
 152 }
 153 
 154 #define VSCR_SAT        1
 155 
 156 static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
 157 {
 158         int exp, mant;
 159 
 160         exp = (x >> 23) & 0xff;
 161         mant = x & 0x7fffff;
 162         if (exp == 255 && mant != 0)
 163                 return 0;               /* NaN -> 0 */
 164         exp = exp - 127 + scale;
 165         if (exp < 0)
 166                 return 0;               /* round towards zero */
 167         if (exp >= 31) {
 168                 /* saturate, unless the result would be -2^31 */
 169                 if (x + (scale << 23) != 0xcf000000)
 170                         *vscrp |= VSCR_SAT;
 171                 return (x & 0x80000000)? 0x80000000: 0x7fffffff;
 172         }
 173         mant |= 0x800000;
 174         mant = (mant << 7) >> (30 - exp);
 175         return (x & 0x80000000)? -mant: mant;
 176 }
 177 
 178 static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
 179 {
 180         int exp;
 181         unsigned int mant;
 182 
 183         exp = (x >> 23) & 0xff;
 184         mant = x & 0x7fffff;
 185         if (exp == 255 && mant != 0)
 186                 return 0;               /* NaN -> 0 */
 187         exp = exp - 127 + scale;
 188         if (exp < 0)
 189                 return 0;               /* round towards zero */
 190         if (x & 0x80000000) {
 191                 /* negative => saturate to 0 */
 192                 *vscrp |= VSCR_SAT;
 193                 return 0;
 194         }
 195         if (exp >= 32) {
 196                 /* saturate */
 197                 *vscrp |= VSCR_SAT;
 198                 return 0xffffffff;
 199         }
 200         mant |= 0x800000;
 201         mant = (mant << 8) >> (31 - exp);
 202         return mant;
 203 }
 204 
 205 /* Round to floating integer, towards 0 */
 206 static unsigned int rfiz(unsigned int x)
 207 {
 208         int exp;
 209 
 210         exp = ((x >> 23) & 0xff) - 127;
 211         if (exp == 128 && (x & 0x7fffff) != 0)
 212                 return x | 0x400000;    /* NaN -> make it a QNaN */
 213         if (exp >= 23)
 214                 return x;               /* it's an integer already (or Inf) */
 215         if (exp < 0)
 216                 return x & 0x80000000;  /* |x| < 1.0 rounds to 0 */
 217         return x & ~(0x7fffff >> exp);
 218 }
 219 
 220 /* Round to floating integer, towards +/- Inf */
 221 static unsigned int rfii(unsigned int x)
 222 {
 223         int exp, mask;
 224 
 225         exp = ((x >> 23) & 0xff) - 127;
 226         if (exp == 128 && (x & 0x7fffff) != 0)
 227                 return x | 0x400000;    /* NaN -> make it a QNaN */
 228         if (exp >= 23)
 229                 return x;               /* it's an integer already (or Inf) */
 230         if ((x & 0x7fffffff) == 0)
 231                 return x;               /* +/-0 -> +/-0 */
 232         if (exp < 0)
 233                 /* 0 < |x| < 1.0 rounds to +/- 1.0 */
 234                 return (x & 0x80000000) | 0x3f800000;
 235         mask = 0x7fffff >> exp;
 236         /* mantissa overflows into exponent - that's OK,
 237            it can't overflow into the sign bit */
 238         return (x + mask) & ~mask;
 239 }
 240 
 241 /* Round to floating integer, to nearest */
 242 static unsigned int rfin(unsigned int x)
 243 {
 244         int exp, half;
 245 
 246         exp = ((x >> 23) & 0xff) - 127;
 247         if (exp == 128 && (x & 0x7fffff) != 0)
 248                 return x | 0x400000;    /* NaN -> make it a QNaN */
 249         if (exp >= 23)
 250                 return x;               /* it's an integer already (or Inf) */
 251         if (exp < -1)
 252                 return x & 0x80000000;  /* |x| < 0.5 -> +/-0 */
 253         if (exp == -1)
 254                 /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
 255                 return (x & 0x80000000) | 0x3f800000;
 256         half = 0x400000 >> exp;
 257         /* add 0.5 to the magnitude and chop off the fraction bits */
 258         return (x + half) & ~(0x7fffff >> exp);
 259 }
 260 
 261 int emulate_altivec(struct pt_regs *regs)
 262 {
 263         unsigned int instr, i;
 264         unsigned int va, vb, vc, vd;
 265         vector128 *vrs;
 266 
 267         if (get_user(instr, (unsigned int __user *) regs->nip))
 268                 return -EFAULT;
 269         if ((instr >> 26) != 4)
 270                 return -EINVAL;         /* not an altivec instruction */
 271         vd = (instr >> 21) & 0x1f;
 272         va = (instr >> 16) & 0x1f;
 273         vb = (instr >> 11) & 0x1f;
 274         vc = (instr >> 6) & 0x1f;
 275 
 276         vrs = current->thread.vr_state.vr;
 277         switch (instr & 0x3f) {
 278         case 10:
 279                 switch (vc) {
 280                 case 0: /* vaddfp */
 281                         vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
 282                         break;
 283                 case 1: /* vsubfp */
 284                         vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
 285                         break;
 286                 case 4: /* vrefp */
 287                         vrefp(&vrs[vd], &vrs[vb]);
 288                         break;
 289                 case 5: /* vrsqrtefp */
 290                         vrsqrtefp(&vrs[vd], &vrs[vb]);
 291                         break;
 292                 case 6: /* vexptefp */
 293                         for (i = 0; i < 4; ++i)
 294                                 vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
 295                         break;
 296                 case 7: /* vlogefp */
 297                         for (i = 0; i < 4; ++i)
 298                                 vrs[vd].u[i] = elog2(vrs[vb].u[i]);
 299                         break;
 300                 case 8:         /* vrfin */
 301                         for (i = 0; i < 4; ++i)
 302                                 vrs[vd].u[i] = rfin(vrs[vb].u[i]);
 303                         break;
 304                 case 9:         /* vrfiz */
 305                         for (i = 0; i < 4; ++i)
 306                                 vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
 307                         break;
 308                 case 10:        /* vrfip */
 309                         for (i = 0; i < 4; ++i) {
 310                                 u32 x = vrs[vb].u[i];
 311                                 x = (x & 0x80000000)? rfiz(x): rfii(x);
 312                                 vrs[vd].u[i] = x;
 313                         }
 314                         break;
 315                 case 11:        /* vrfim */
 316                         for (i = 0; i < 4; ++i) {
 317                                 u32 x = vrs[vb].u[i];
 318                                 x = (x & 0x80000000)? rfii(x): rfiz(x);
 319                                 vrs[vd].u[i] = x;
 320                         }
 321                         break;
 322                 case 14:        /* vctuxs */
 323                         for (i = 0; i < 4; ++i)
 324                                 vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
 325                                         &current->thread.vr_state.vscr.u[3]);
 326                         break;
 327                 case 15:        /* vctsxs */
 328                         for (i = 0; i < 4; ++i)
 329                                 vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
 330                                         &current->thread.vr_state.vscr.u[3]);
 331                         break;
 332                 default:
 333                         return -EINVAL;
 334                 }
 335                 break;
 336         case 46:        /* vmaddfp */
 337                 vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
 338                 break;
 339         case 47:        /* vnmsubfp */
 340                 vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
 341                 break;
 342         default:
 343                 return -EINVAL;
 344         }
 345 
 346         return 0;
 347 }
/* [<][>][^][v][top][bottom][index][help] */
root/arch/powerpc/kernel/vecemu.c

DEFINITIONS