1/* 2 * Copyright (C) 1994 Linus Torvalds 3 * 4 * Pentium III FXSR, SSE support 5 * General FPU state handling cleanups 6 * Gareth Hughes <gareth@valinux.com>, May 2000 7 */ 8#include <linux/module.h> 9#include <linux/regset.h> 10#include <linux/sched.h> 11#include <linux/slab.h> 12 13#include <asm/sigcontext.h> 14#include <asm/processor.h> 15#include <asm/math_emu.h> 16#include <asm/tlbflush.h> 17#include <asm/uaccess.h> 18#include <asm/ptrace.h> 19#include <asm/i387.h> 20#include <asm/fpu-internal.h> 21#include <asm/user.h> 22 23static DEFINE_PER_CPU(bool, in_kernel_fpu); 24 25void kernel_fpu_disable(void) 26{ 27 WARN_ON(this_cpu_read(in_kernel_fpu)); 28 this_cpu_write(in_kernel_fpu, true); 29} 30 31void kernel_fpu_enable(void) 32{ 33 this_cpu_write(in_kernel_fpu, false); 34} 35 36/* 37 * Were we in an interrupt that interrupted kernel mode? 38 * 39 * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that 40 * pair does nothing at all: the thread must not have fpu (so 41 * that we don't try to save the FPU state), and TS must 42 * be set (so that the clts/stts pair does nothing that is 43 * visible in the interrupted kernel thread). 44 * 45 * Except for the eagerfpu case when we return true; in the likely case 46 * the thread has FPU but we are not going to set/clear TS. 47 */ 48static inline bool interrupted_kernel_fpu_idle(void) 49{ 50 if (this_cpu_read(in_kernel_fpu)) 51 return false; 52 53 if (use_eager_fpu()) 54 return true; 55 56 return !__thread_has_fpu(current) && 57 (read_cr0() & X86_CR0_TS); 58} 59 60/* 61 * Were we in user mode (or vm86 mode) when we were 62 * interrupted? 63 * 64 * Doing kernel_fpu_begin/end() is ok if we are running 65 * in an interrupt context from user mode - we'll just 66 * save the FPU state as required. 67 */ 68static inline bool interrupted_user_mode(void) 69{ 70 struct pt_regs *regs = get_irq_regs(); 71 return regs && user_mode(regs); 72} 73 74/* 75 * Can we use the FPU in kernel mode with the 76 * whole "kernel_fpu_begin/end()" sequence? 77 * 78 * It's always ok in process context (ie "not interrupt") 79 * but it is sometimes ok even from an irq. 80 */ 81bool irq_fpu_usable(void) 82{ 83 return !in_interrupt() || 84 interrupted_user_mode() || 85 interrupted_kernel_fpu_idle(); 86} 87EXPORT_SYMBOL(irq_fpu_usable); 88 89void __kernel_fpu_begin(void) 90{ 91 struct task_struct *me = current; 92 93 this_cpu_write(in_kernel_fpu, true); 94 95 if (__thread_has_fpu(me)) { 96 __save_init_fpu(me); 97 } else { 98 this_cpu_write(fpu_owner_task, NULL); 99 if (!use_eager_fpu()) 100 clts(); 101 } 102} 103EXPORT_SYMBOL(__kernel_fpu_begin); 104 105void __kernel_fpu_end(void) 106{ 107 struct task_struct *me = current; 108 109 if (__thread_has_fpu(me)) { 110 if (WARN_ON(restore_fpu_checking(me))) 111 fpu_reset_state(me); 112 } else if (!use_eager_fpu()) { 113 stts(); 114 } 115 116 this_cpu_write(in_kernel_fpu, false); 117} 118EXPORT_SYMBOL(__kernel_fpu_end); 119 120void unlazy_fpu(struct task_struct *tsk) 121{ 122 preempt_disable(); 123 if (__thread_has_fpu(tsk)) { 124 if (use_eager_fpu()) { 125 __save_fpu(tsk); 126 } else { 127 __save_init_fpu(tsk); 128 __thread_fpu_end(tsk); 129 } 130 } 131 preempt_enable(); 132} 133EXPORT_SYMBOL(unlazy_fpu); 134 135unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; 136unsigned int xstate_size; 137EXPORT_SYMBOL_GPL(xstate_size); 138static struct i387_fxsave_struct fx_scratch; 139 140static void mxcsr_feature_mask_init(void) 141{ 142 unsigned long mask = 0; 143 144 if (cpu_has_fxsr) { 145 memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); 146 asm volatile("fxsave %0" : "+m" (fx_scratch)); 147 mask = fx_scratch.mxcsr_mask; 148 if (mask == 0) 149 mask = 0x0000ffbf; 150 } 151 mxcsr_feature_mask &= mask; 152} 153 154static void init_thread_xstate(void) 155{ 156 /* 157 * Note that xstate_size might be overwriten later during 158 * xsave_init(). 159 */ 160 161 if (!cpu_has_fpu) { 162 /* 163 * Disable xsave as we do not support it if i387 164 * emulation is enabled. 165 */ 166 setup_clear_cpu_cap(X86_FEATURE_XSAVE); 167 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); 168 xstate_size = sizeof(struct i387_soft_struct); 169 return; 170 } 171 172 if (cpu_has_fxsr) 173 xstate_size = sizeof(struct i387_fxsave_struct); 174 else 175 xstate_size = sizeof(struct i387_fsave_struct); 176 177 /* 178 * Quirk: we don't yet handle the XSAVES* instructions 179 * correctly, as we don't correctly convert between 180 * standard and compacted format when interfacing 181 * with user-space - so disable it for now. 182 * 183 * The difference is small: with recent CPUs the 184 * compacted format is only marginally smaller than 185 * the standard FPU state format. 186 * 187 * ( This is easy to backport while we are fixing 188 * XSAVES* support. ) 189 */ 190 setup_clear_cpu_cap(X86_FEATURE_XSAVES); 191} 192 193/* 194 * Called at bootup to set up the initial FPU state that is later cloned 195 * into all processes. 196 */ 197 198void fpu_init(void) 199{ 200 unsigned long cr0; 201 unsigned long cr4_mask = 0; 202 203#ifndef CONFIG_MATH_EMULATION 204 if (!cpu_has_fpu) { 205 pr_emerg("No FPU found and no math emulation present\n"); 206 pr_emerg("Giving up\n"); 207 for (;;) 208 asm volatile("hlt"); 209 } 210#endif 211 if (cpu_has_fxsr) 212 cr4_mask |= X86_CR4_OSFXSR; 213 if (cpu_has_xmm) 214 cr4_mask |= X86_CR4_OSXMMEXCPT; 215 if (cr4_mask) 216 cr4_set_bits(cr4_mask); 217 218 cr0 = read_cr0(); 219 cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ 220 if (!cpu_has_fpu) 221 cr0 |= X86_CR0_EM; 222 write_cr0(cr0); 223 224 /* 225 * init_thread_xstate is only called once to avoid overriding 226 * xstate_size during boot time or during CPU hotplug. 227 */ 228 if (xstate_size == 0) 229 init_thread_xstate(); 230 231 mxcsr_feature_mask_init(); 232 xsave_init(); 233 eager_fpu_init(); 234} 235 236void fpu_finit(struct fpu *fpu) 237{ 238 if (!cpu_has_fpu) { 239 finit_soft_fpu(&fpu->state->soft); 240 return; 241 } 242 243 memset(fpu->state, 0, xstate_size); 244 245 if (cpu_has_fxsr) { 246 fx_finit(&fpu->state->fxsave); 247 } else { 248 struct i387_fsave_struct *fp = &fpu->state->fsave; 249 fp->cwd = 0xffff037fu; 250 fp->swd = 0xffff0000u; 251 fp->twd = 0xffffffffu; 252 fp->fos = 0xffff0000u; 253 } 254} 255EXPORT_SYMBOL_GPL(fpu_finit); 256 257/* 258 * The _current_ task is using the FPU for the first time 259 * so initialize it and set the mxcsr to its default 260 * value at reset if we support XMM instructions and then 261 * remember the current task has used the FPU. 262 */ 263int init_fpu(struct task_struct *tsk) 264{ 265 int ret; 266 267 if (tsk_used_math(tsk)) { 268 if (cpu_has_fpu && tsk == current) 269 unlazy_fpu(tsk); 270 task_disable_lazy_fpu_restore(tsk); 271 return 0; 272 } 273 274 /* 275 * Memory allocation at the first usage of the FPU and other state. 276 */ 277 ret = fpu_alloc(&tsk->thread.fpu); 278 if (ret) 279 return ret; 280 281 fpu_finit(&tsk->thread.fpu); 282 283 set_stopped_child_used_math(tsk); 284 return 0; 285} 286EXPORT_SYMBOL_GPL(init_fpu); 287 288/* 289 * The xstateregs_active() routine is the same as the fpregs_active() routine, 290 * as the "regset->n" for the xstate regset will be updated based on the feature 291 * capabilites supported by the xsave. 292 */ 293int fpregs_active(struct task_struct *target, const struct user_regset *regset) 294{ 295 return tsk_used_math(target) ? regset->n : 0; 296} 297 298int xfpregs_active(struct task_struct *target, const struct user_regset *regset) 299{ 300 return (cpu_has_fxsr && tsk_used_math(target)) ? regset->n : 0; 301} 302 303int xfpregs_get(struct task_struct *target, const struct user_regset *regset, 304 unsigned int pos, unsigned int count, 305 void *kbuf, void __user *ubuf) 306{ 307 int ret; 308 309 if (!cpu_has_fxsr) 310 return -ENODEV; 311 312 ret = init_fpu(target); 313 if (ret) 314 return ret; 315 316 sanitize_i387_state(target); 317 318 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 319 &target->thread.fpu.state->fxsave, 0, -1); 320} 321 322int xfpregs_set(struct task_struct *target, const struct user_regset *regset, 323 unsigned int pos, unsigned int count, 324 const void *kbuf, const void __user *ubuf) 325{ 326 int ret; 327 328 if (!cpu_has_fxsr) 329 return -ENODEV; 330 331 ret = init_fpu(target); 332 if (ret) 333 return ret; 334 335 sanitize_i387_state(target); 336 337 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, 338 &target->thread.fpu.state->fxsave, 0, -1); 339 340 /* 341 * mxcsr reserved bits must be masked to zero for security reasons. 342 */ 343 target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; 344 345 /* 346 * update the header bits in the xsave header, indicating the 347 * presence of FP and SSE state. 348 */ 349 if (cpu_has_xsave) 350 target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; 351 352 return ret; 353} 354 355int xstateregs_get(struct task_struct *target, const struct user_regset *regset, 356 unsigned int pos, unsigned int count, 357 void *kbuf, void __user *ubuf) 358{ 359 struct xsave_struct *xsave; 360 int ret; 361 362 if (!cpu_has_xsave) 363 return -ENODEV; 364 365 ret = init_fpu(target); 366 if (ret) 367 return ret; 368 369 xsave = &target->thread.fpu.state->xsave; 370 371 /* 372 * Copy the 48bytes defined by the software first into the xstate 373 * memory layout in the thread struct, so that we can copy the entire 374 * xstateregs to the user using one user_regset_copyout(). 375 */ 376 memcpy(&xsave->i387.sw_reserved, 377 xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); 378 /* 379 * Copy the xstate memory layout. 380 */ 381 ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); 382 return ret; 383} 384 385int xstateregs_set(struct task_struct *target, const struct user_regset *regset, 386 unsigned int pos, unsigned int count, 387 const void *kbuf, const void __user *ubuf) 388{ 389 struct xsave_struct *xsave; 390 int ret; 391 392 if (!cpu_has_xsave) 393 return -ENODEV; 394 395 ret = init_fpu(target); 396 if (ret) 397 return ret; 398 399 xsave = &target->thread.fpu.state->xsave; 400 401 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); 402 /* 403 * mxcsr reserved bits must be masked to zero for security reasons. 404 */ 405 xsave->i387.mxcsr &= mxcsr_feature_mask; 406 xsave->xsave_hdr.xstate_bv &= pcntxt_mask; 407 /* 408 * These bits must be zero. 409 */ 410 memset(&xsave->xsave_hdr.reserved, 0, 48); 411 return ret; 412} 413 414#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 415 416/* 417 * FPU tag word conversions. 418 */ 419 420static inline unsigned short twd_i387_to_fxsr(unsigned short twd) 421{ 422 unsigned int tmp; /* to avoid 16 bit prefixes in the code */ 423 424 /* Transform each pair of bits into 01 (valid) or 00 (empty) */ 425 tmp = ~twd; 426 tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ 427 /* and move the valid bits to the lower byte. */ 428 tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ 429 tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ 430 tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ 431 432 return tmp; 433} 434 435#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16) 436#define FP_EXP_TAG_VALID 0 437#define FP_EXP_TAG_ZERO 1 438#define FP_EXP_TAG_SPECIAL 2 439#define FP_EXP_TAG_EMPTY 3 440 441static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) 442{ 443 struct _fpxreg *st; 444 u32 tos = (fxsave->swd >> 11) & 7; 445 u32 twd = (unsigned long) fxsave->twd; 446 u32 tag; 447 u32 ret = 0xffff0000u; 448 int i; 449 450 for (i = 0; i < 8; i++, twd >>= 1) { 451 if (twd & 0x1) { 452 st = FPREG_ADDR(fxsave, (i - tos) & 7); 453 454 switch (st->exponent & 0x7fff) { 455 case 0x7fff: 456 tag = FP_EXP_TAG_SPECIAL; 457 break; 458 case 0x0000: 459 if (!st->significand[0] && 460 !st->significand[1] && 461 !st->significand[2] && 462 !st->significand[3]) 463 tag = FP_EXP_TAG_ZERO; 464 else 465 tag = FP_EXP_TAG_SPECIAL; 466 break; 467 default: 468 if (st->significand[3] & 0x8000) 469 tag = FP_EXP_TAG_VALID; 470 else 471 tag = FP_EXP_TAG_SPECIAL; 472 break; 473 } 474 } else { 475 tag = FP_EXP_TAG_EMPTY; 476 } 477 ret |= tag << (2 * i); 478 } 479 return ret; 480} 481 482/* 483 * FXSR floating point environment conversions. 484 */ 485 486void 487convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) 488{ 489 struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; 490 struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; 491 struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; 492 int i; 493 494 env->cwd = fxsave->cwd | 0xffff0000u; 495 env->swd = fxsave->swd | 0xffff0000u; 496 env->twd = twd_fxsr_to_i387(fxsave); 497 498#ifdef CONFIG_X86_64 499 env->fip = fxsave->rip; 500 env->foo = fxsave->rdp; 501 /* 502 * should be actually ds/cs at fpu exception time, but 503 * that information is not available in 64bit mode. 504 */ 505 env->fcs = task_pt_regs(tsk)->cs; 506 if (tsk == current) { 507 savesegment(ds, env->fos); 508 } else { 509 env->fos = tsk->thread.ds; 510 } 511 env->fos |= 0xffff0000; 512#else 513 env->fip = fxsave->fip; 514 env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16); 515 env->foo = fxsave->foo; 516 env->fos = fxsave->fos; 517#endif 518 519 for (i = 0; i < 8; ++i) 520 memcpy(&to[i], &from[i], sizeof(to[0])); 521} 522 523void convert_to_fxsr(struct task_struct *tsk, 524 const struct user_i387_ia32_struct *env) 525 526{ 527 struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; 528 struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; 529 struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; 530 int i; 531 532 fxsave->cwd = env->cwd; 533 fxsave->swd = env->swd; 534 fxsave->twd = twd_i387_to_fxsr(env->twd); 535 fxsave->fop = (u16) ((u32) env->fcs >> 16); 536#ifdef CONFIG_X86_64 537 fxsave->rip = env->fip; 538 fxsave->rdp = env->foo; 539 /* cs and ds ignored */ 540#else 541 fxsave->fip = env->fip; 542 fxsave->fcs = (env->fcs & 0xffff); 543 fxsave->foo = env->foo; 544 fxsave->fos = env->fos; 545#endif 546 547 for (i = 0; i < 8; ++i) 548 memcpy(&to[i], &from[i], sizeof(from[0])); 549} 550 551int fpregs_get(struct task_struct *target, const struct user_regset *regset, 552 unsigned int pos, unsigned int count, 553 void *kbuf, void __user *ubuf) 554{ 555 struct user_i387_ia32_struct env; 556 int ret; 557 558 ret = init_fpu(target); 559 if (ret) 560 return ret; 561 562 if (!static_cpu_has(X86_FEATURE_FPU)) 563 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); 564 565 if (!cpu_has_fxsr) 566 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 567 &target->thread.fpu.state->fsave, 0, 568 -1); 569 570 sanitize_i387_state(target); 571 572 if (kbuf && pos == 0 && count == sizeof(env)) { 573 convert_from_fxsr(kbuf, target); 574 return 0; 575 } 576 577 convert_from_fxsr(&env, target); 578 579 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1); 580} 581 582int fpregs_set(struct task_struct *target, const struct user_regset *regset, 583 unsigned int pos, unsigned int count, 584 const void *kbuf, const void __user *ubuf) 585{ 586 struct user_i387_ia32_struct env; 587 int ret; 588 589 ret = init_fpu(target); 590 if (ret) 591 return ret; 592 593 sanitize_i387_state(target); 594 595 if (!static_cpu_has(X86_FEATURE_FPU)) 596 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); 597 598 if (!cpu_has_fxsr) 599 return user_regset_copyin(&pos, &count, &kbuf, &ubuf, 600 &target->thread.fpu.state->fsave, 0, 601 -1); 602 603 if (pos > 0 || count < sizeof(env)) 604 convert_from_fxsr(&env, target); 605 606 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1); 607 if (!ret) 608 convert_to_fxsr(target, &env); 609 610 /* 611 * update the header bit in the xsave header, indicating the 612 * presence of FP. 613 */ 614 if (cpu_has_xsave) 615 target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; 616 return ret; 617} 618 619/* 620 * FPU state for core dumps. 621 * This is only used for a.out dumps now. 622 * It is declared generically using elf_fpregset_t (which is 623 * struct user_i387_struct) but is in fact only used for 32-bit 624 * dumps, so on 64-bit it is really struct user_i387_ia32_struct. 625 */ 626int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu) 627{ 628 struct task_struct *tsk = current; 629 int fpvalid; 630 631 fpvalid = !!used_math(); 632 if (fpvalid) 633 fpvalid = !fpregs_get(tsk, NULL, 634 0, sizeof(struct user_i387_ia32_struct), 635 fpu, NULL); 636 637 return fpvalid; 638} 639EXPORT_SYMBOL(dump_fpu); 640 641#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ 642 643static int __init no_387(char *s) 644{ 645 setup_clear_cpu_cap(X86_FEATURE_FPU); 646 return 1; 647} 648 649__setup("no387", no_387); 650 651void fpu_detect(struct cpuinfo_x86 *c) 652{ 653 unsigned long cr0; 654 u16 fsw, fcw; 655 656 fsw = fcw = 0xffff; 657 658 cr0 = read_cr0(); 659 cr0 &= ~(X86_CR0_TS | X86_CR0_EM); 660 write_cr0(cr0); 661 662 asm volatile("fninit ; fnstsw %0 ; fnstcw %1" 663 : "+m" (fsw), "+m" (fcw)); 664 665 if (fsw == 0 && (fcw & 0x103f) == 0x003f) 666 set_cpu_cap(c, X86_FEATURE_FPU); 667 else 668 clear_cpu_cap(c, X86_FEATURE_FPU); 669 670 /* The final cr0 value is set in fpu_init() */ 671} 672