root/drivers/platform/x86/intel_ips.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ips_cpu_busy
  2. ips_cpu_raise
  3. ips_cpu_lower
  4. do_enable_cpu_turbo
  5. ips_enable_cpu_turbo
  6. do_disable_cpu_turbo
  7. ips_disable_cpu_turbo
  8. ips_gpu_busy
  9. ips_gpu_raise
  10. ips_gpu_lower
  11. ips_enable_gpu_turbo
  12. ips_disable_gpu_turbo
  13. mcp_exceeded
  14. cpu_exceeded
  15. mch_exceeded
  16. verify_limits
  17. update_turbo_limits
  18. ips_adjust
  19. calc_avg_temp
  20. read_mgtv
  21. read_ptv
  22. read_ctv
  23. get_cpu_power
  24. update_average_temp
  25. update_average_power
  26. calc_avg_power
  27. monitor_timeout
  28. ips_monitor
  29. dump_thermal_info
  30. ips_irq_handler
  31. ips_debugfs_init
  32. ips_debugfs_cleanup
  33. cpu_temp_show
  34. cpu_power_show
  35. cpu_clamp_show
  36. mch_temp_show
  37. mch_power_show
  38. ips_debugfs_cleanup
  39. ips_debugfs_init
  40. ips_detect_cpu
  41. ips_get_i915_syms
  42. ips_gpu_turbo_enabled
  43. ips_link_to_i915_driver
  44. ips_blacklist_callback
  45. ips_probe
  46. ips_remove

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (c) 2009-2010 Intel Corporation
   4  *
   5  * Authors:
   6  *      Jesse Barnes <jbarnes@virtuousgeek.org>
   7  */
   8 
   9 /*
  10  * Some Intel Ibex Peak based platforms support so-called "intelligent
  11  * power sharing", which allows the CPU and GPU to cooperate to maximize
  12  * performance within a given TDP (thermal design point).  This driver
  13  * performs the coordination between the CPU and GPU, monitors thermal and
  14  * power statistics in the platform, and initializes power monitoring
  15  * hardware.  It also provides a few tunables to control behavior.  Its
  16  * primary purpose is to safely allow CPU and GPU turbo modes to be enabled
  17  * by tracking power and thermal budget; secondarily it can boost turbo
  18  * performance by allocating more power or thermal budget to the CPU or GPU
  19  * based on available headroom and activity.
  20  *
  21  * The basic algorithm is driven by a 5s moving average of temperature.  If
  22  * thermal headroom is available, the CPU and/or GPU power clamps may be
  23  * adjusted upwards.  If we hit the thermal ceiling or a thermal trigger,
  24  * we scale back the clamp.  Aside from trigger events (when we're critically
  25  * close or over our TDP) we don't adjust the clamps more than once every
  26  * five seconds.
  27  *
  28  * The thermal device (device 31, function 6) has a set of registers that
  29  * are updated by the ME firmware.  The ME should also take the clamp values
  30  * written to those registers and write them to the CPU, but we currently
  31  * bypass that functionality and write the CPU MSR directly.
  32  *
  33  * UNSUPPORTED:
  34  *   - dual MCP configs
  35  *
  36  * TODO:
  37  *   - handle CPU hotplug
  38  *   - provide turbo enable/disable api
  39  *
  40  * Related documents:
  41  *   - CDI 403777, 403778 - Auburndale EDS vol 1 & 2
  42  *   - CDI 401376 - Ibex Peak EDS
  43  *   - ref 26037, 26641 - IPS BIOS spec
  44  *   - ref 26489 - Nehalem BIOS writer's guide
  45  *   - ref 26921 - Ibex Peak BIOS Specification
  46  */
  47 
  48 #include <linux/debugfs.h>
  49 #include <linux/delay.h>
  50 #include <linux/interrupt.h>
  51 #include <linux/kernel.h>
  52 #include <linux/kthread.h>
  53 #include <linux/module.h>
  54 #include <linux/pci.h>
  55 #include <linux/sched.h>
  56 #include <linux/sched/loadavg.h>
  57 #include <linux/seq_file.h>
  58 #include <linux/string.h>
  59 #include <linux/tick.h>
  60 #include <linux/timer.h>
  61 #include <linux/dmi.h>
  62 #include <drm/i915_drm.h>
  63 #include <asm/msr.h>
  64 #include <asm/processor.h>
  65 #include "intel_ips.h"
  66 
  67 #include <linux/io-64-nonatomic-lo-hi.h>
  68 
  69 #define PCI_DEVICE_ID_INTEL_THERMAL_SENSOR 0x3b32
  70 
  71 /*
  72  * Package level MSRs for monitor/control
  73  */
  74 #define PLATFORM_INFO   0xce
  75 #define   PLATFORM_TDP          (1<<29)
  76 #define   PLATFORM_RATIO        (1<<28)
  77 
  78 #define IA32_MISC_ENABLE        0x1a0
  79 #define   IA32_MISC_TURBO_EN    (1ULL<<38)
  80 
  81 #define TURBO_POWER_CURRENT_LIMIT       0x1ac
  82 #define   TURBO_TDC_OVR_EN      (1UL<<31)
  83 #define   TURBO_TDC_MASK        (0x000000007fff0000UL)
  84 #define   TURBO_TDC_SHIFT       (16)
  85 #define   TURBO_TDP_OVR_EN      (1UL<<15)
  86 #define   TURBO_TDP_MASK        (0x0000000000003fffUL)
  87 
  88 /*
  89  * Core/thread MSRs for monitoring
  90  */
  91 #define IA32_PERF_CTL           0x199
  92 #define   IA32_PERF_TURBO_DIS   (1ULL<<32)
  93 
  94 /*
  95  * Thermal PCI device regs
  96  */
  97 #define THM_CFG_TBAR    0x10
  98 #define THM_CFG_TBAR_HI 0x14
  99 
 100 #define THM_TSIU        0x00
 101 #define THM_TSE         0x01
 102 #define   TSE_EN        0xb8
 103 #define THM_TSS         0x02
 104 #define THM_TSTR        0x03
 105 #define THM_TSTTP       0x04
 106 #define THM_TSCO        0x08
 107 #define THM_TSES        0x0c
 108 #define THM_TSGPEN      0x0d
 109 #define   TSGPEN_HOT_LOHI       (1<<1)
 110 #define   TSGPEN_CRIT_LOHI      (1<<2)
 111 #define THM_TSPC        0x0e
 112 #define THM_PPEC        0x10
 113 #define THM_CTA         0x12
 114 #define THM_PTA         0x14
 115 #define   PTA_SLOPE_MASK        (0xff00)
 116 #define   PTA_SLOPE_SHIFT       8
 117 #define   PTA_OFFSET_MASK       (0x00ff)
 118 #define THM_MGTA        0x16
 119 #define   MGTA_SLOPE_MASK       (0xff00)
 120 #define   MGTA_SLOPE_SHIFT      8
 121 #define   MGTA_OFFSET_MASK      (0x00ff)
 122 #define THM_TRC         0x1a
 123 #define   TRC_CORE2_EN  (1<<15)
 124 #define   TRC_THM_EN    (1<<12)
 125 #define   TRC_C6_WAR    (1<<8)
 126 #define   TRC_CORE1_EN  (1<<7)
 127 #define   TRC_CORE_PWR  (1<<6)
 128 #define   TRC_PCH_EN    (1<<5)
 129 #define   TRC_MCH_EN    (1<<4)
 130 #define   TRC_DIMM4     (1<<3)
 131 #define   TRC_DIMM3     (1<<2)
 132 #define   TRC_DIMM2     (1<<1)
 133 #define   TRC_DIMM1     (1<<0)
 134 #define THM_TES         0x20
 135 #define THM_TEN         0x21
 136 #define   TEN_UPDATE_EN 1
 137 #define THM_PSC         0x24
 138 #define   PSC_NTG       (1<<0) /* No GFX turbo support */
 139 #define   PSC_NTPC      (1<<1) /* No CPU turbo support */
 140 #define   PSC_PP_DEF    (0<<2) /* Perf policy up to driver */
 141 #define   PSP_PP_PC     (1<<2) /* BIOS prefers CPU perf */
 142 #define   PSP_PP_BAL    (2<<2) /* BIOS wants balanced perf */
 143 #define   PSP_PP_GFX    (3<<2) /* BIOS prefers GFX perf */
 144 #define   PSP_PBRT      (1<<4) /* BIOS run time support */
 145 #define THM_CTV1        0x30
 146 #define   CTV_TEMP_ERROR (1<<15)
 147 #define   CTV_TEMP_MASK 0x3f
 148 #define   CTV_
 149 #define THM_CTV2        0x32
 150 #define THM_CEC         0x34 /* undocumented power accumulator in joules */
 151 #define THM_AE          0x3f
 152 #define THM_HTS         0x50 /* 32 bits */
 153 #define   HTS_PCPL_MASK (0x7fe00000)
 154 #define   HTS_PCPL_SHIFT 21
 155 #define   HTS_GPL_MASK  (0x001ff000)
 156 #define   HTS_GPL_SHIFT 12
 157 #define   HTS_PP_MASK   (0x00000c00)
 158 #define   HTS_PP_SHIFT  10
 159 #define   HTS_PP_DEF    0
 160 #define   HTS_PP_PROC   1
 161 #define   HTS_PP_BAL    2
 162 #define   HTS_PP_GFX    3
 163 #define   HTS_PCTD_DIS  (1<<9)
 164 #define   HTS_GTD_DIS   (1<<8)
 165 #define   HTS_PTL_MASK  (0x000000fe)
 166 #define   HTS_PTL_SHIFT 1
 167 #define   HTS_NVV       (1<<0)
 168 #define THM_HTSHI       0x54 /* 16 bits */
 169 #define   HTS2_PPL_MASK         (0x03ff)
 170 #define   HTS2_PRST_MASK        (0x3c00)
 171 #define   HTS2_PRST_SHIFT       10
 172 #define   HTS2_PRST_UNLOADED    0
 173 #define   HTS2_PRST_RUNNING     1
 174 #define   HTS2_PRST_TDISOP      2 /* turbo disabled due to power */
 175 #define   HTS2_PRST_TDISHT      3 /* turbo disabled due to high temp */
 176 #define   HTS2_PRST_TDISUSR     4 /* user disabled turbo */
 177 #define   HTS2_PRST_TDISPLAT    5 /* platform disabled turbo */
 178 #define   HTS2_PRST_TDISPM      6 /* power management disabled turbo */
 179 #define   HTS2_PRST_TDISERR     7 /* some kind of error disabled turbo */
 180 #define THM_PTL         0x56
 181 #define THM_MGTV        0x58
 182 #define   TV_MASK       0x000000000000ff00
 183 #define   TV_SHIFT      8
 184 #define THM_PTV         0x60
 185 #define   PTV_MASK      0x00ff
 186 #define THM_MMGPC       0x64
 187 #define THM_MPPC        0x66
 188 #define THM_MPCPC       0x68
 189 #define THM_TSPIEN      0x82
 190 #define   TSPIEN_AUX_LOHI       (1<<0)
 191 #define   TSPIEN_HOT_LOHI       (1<<1)
 192 #define   TSPIEN_CRIT_LOHI      (1<<2)
 193 #define   TSPIEN_AUX2_LOHI      (1<<3)
 194 #define THM_TSLOCK      0x83
 195 #define THM_ATR         0x84
 196 #define THM_TOF         0x87
 197 #define THM_STS         0x98
 198 #define   STS_PCPL_MASK         (0x7fe00000)
 199 #define   STS_PCPL_SHIFT        21
 200 #define   STS_GPL_MASK          (0x001ff000)
 201 #define   STS_GPL_SHIFT         12
 202 #define   STS_PP_MASK           (0x00000c00)
 203 #define   STS_PP_SHIFT          10
 204 #define   STS_PP_DEF            0
 205 #define   STS_PP_PROC           1
 206 #define   STS_PP_BAL            2
 207 #define   STS_PP_GFX            3
 208 #define   STS_PCTD_DIS          (1<<9)
 209 #define   STS_GTD_DIS           (1<<8)
 210 #define   STS_PTL_MASK          (0x000000fe)
 211 #define   STS_PTL_SHIFT         1
 212 #define   STS_NVV               (1<<0)
 213 #define THM_SEC         0x9c
 214 #define   SEC_ACK       (1<<0)
 215 #define THM_TC3         0xa4
 216 #define THM_TC1         0xa8
 217 #define   STS_PPL_MASK          (0x0003ff00)
 218 #define   STS_PPL_SHIFT         16
 219 #define THM_TC2         0xac
 220 #define THM_DTV         0xb0
 221 #define THM_ITV         0xd8
 222 #define   ITV_ME_SEQNO_MASK 0x00ff0000 /* ME should update every ~200ms */
 223 #define   ITV_ME_SEQNO_SHIFT (16)
 224 #define   ITV_MCH_TEMP_MASK 0x0000ff00
 225 #define   ITV_MCH_TEMP_SHIFT (8)
 226 #define   ITV_PCH_TEMP_MASK 0x000000ff
 227 
 228 #define thm_readb(off) readb(ips->regmap + (off))
 229 #define thm_readw(off) readw(ips->regmap + (off))
 230 #define thm_readl(off) readl(ips->regmap + (off))
 231 #define thm_readq(off) readq(ips->regmap + (off))
 232 
 233 #define thm_writeb(off, val) writeb((val), ips->regmap + (off))
 234 #define thm_writew(off, val) writew((val), ips->regmap + (off))
 235 #define thm_writel(off, val) writel((val), ips->regmap + (off))
 236 
 237 static const int IPS_ADJUST_PERIOD = 5000; /* ms */
 238 static bool late_i915_load = false;
 239 
 240 /* For initial average collection */
 241 static const int IPS_SAMPLE_PERIOD = 200; /* ms */
 242 static const int IPS_SAMPLE_WINDOW = 5000; /* 5s moving window of samples */
 243 #define IPS_SAMPLE_COUNT (IPS_SAMPLE_WINDOW / IPS_SAMPLE_PERIOD)
 244 
 245 /* Per-SKU limits */
 246 struct ips_mcp_limits {
 247         int mcp_power_limit; /* mW units */
 248         int core_power_limit;
 249         int mch_power_limit;
 250         int core_temp_limit; /* degrees C */
 251         int mch_temp_limit;
 252 };
 253 
 254 /* Max temps are -10 degrees C to avoid PROCHOT# */
 255 
 256 static struct ips_mcp_limits ips_sv_limits = {
 257         .mcp_power_limit = 35000,
 258         .core_power_limit = 29000,
 259         .mch_power_limit = 20000,
 260         .core_temp_limit = 95,
 261         .mch_temp_limit = 90
 262 };
 263 
 264 static struct ips_mcp_limits ips_lv_limits = {
 265         .mcp_power_limit = 25000,
 266         .core_power_limit = 21000,
 267         .mch_power_limit = 13000,
 268         .core_temp_limit = 95,
 269         .mch_temp_limit = 90
 270 };
 271 
 272 static struct ips_mcp_limits ips_ulv_limits = {
 273         .mcp_power_limit = 18000,
 274         .core_power_limit = 14000,
 275         .mch_power_limit = 11000,
 276         .core_temp_limit = 95,
 277         .mch_temp_limit = 90
 278 };
 279 
 280 struct ips_driver {
 281         struct device *dev;
 282         void __iomem *regmap;
 283         int irq;
 284 
 285         struct task_struct *monitor;
 286         struct task_struct *adjust;
 287         struct dentry *debug_root;
 288         struct timer_list timer;
 289 
 290         /* Average CPU core temps (all averages in .01 degrees C for precision) */
 291         u16 ctv1_avg_temp;
 292         u16 ctv2_avg_temp;
 293         /* GMCH average */
 294         u16 mch_avg_temp;
 295         /* Average for the CPU (both cores?) */
 296         u16 mcp_avg_temp;
 297         /* Average power consumption (in mW) */
 298         u32 cpu_avg_power;
 299         u32 mch_avg_power;
 300 
 301         /* Offset values */
 302         u16 cta_val;
 303         u16 pta_val;
 304         u16 mgta_val;
 305 
 306         /* Maximums & prefs, protected by turbo status lock */
 307         spinlock_t turbo_status_lock;
 308         u16 mcp_temp_limit;
 309         u16 mcp_power_limit;
 310         u16 core_power_limit;
 311         u16 mch_power_limit;
 312         bool cpu_turbo_enabled;
 313         bool __cpu_turbo_on;
 314         bool gpu_turbo_enabled;
 315         bool __gpu_turbo_on;
 316         bool gpu_preferred;
 317         bool poll_turbo_status;
 318         bool second_cpu;
 319         bool turbo_toggle_allowed;
 320         struct ips_mcp_limits *limits;
 321 
 322         /* Optional MCH interfaces for if i915 is in use */
 323         unsigned long (*read_mch_val)(void);
 324         bool (*gpu_raise)(void);
 325         bool (*gpu_lower)(void);
 326         bool (*gpu_busy)(void);
 327         bool (*gpu_turbo_disable)(void);
 328 
 329         /* For restoration at unload */
 330         u64 orig_turbo_limit;
 331         u64 orig_turbo_ratios;
 332 };
 333 
 334 static bool
 335 ips_gpu_turbo_enabled(struct ips_driver *ips);
 336 
 337 /**
 338  * ips_cpu_busy - is CPU busy?
 339  * @ips: IPS driver struct
 340  *
 341  * Check CPU for load to see whether we should increase its thermal budget.
 342  *
 343  * RETURNS:
 344  * True if the CPU could use more power, false otherwise.
 345  */
 346 static bool ips_cpu_busy(struct ips_driver *ips)
 347 {
 348         if ((avenrun[0] >> FSHIFT) > 1)
 349                 return true;
 350 
 351         return false;
 352 }
 353 
 354 /**
 355  * ips_cpu_raise - raise CPU power clamp
 356  * @ips: IPS driver struct
 357  *
 358  * Raise the CPU power clamp by %IPS_CPU_STEP, in accordance with TDP for
 359  * this platform.
 360  *
 361  * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR upwards (as
 362  * long as we haven't hit the TDP limit for the SKU).
 363  */
 364 static void ips_cpu_raise(struct ips_driver *ips)
 365 {
 366         u64 turbo_override;
 367         u16 cur_tdp_limit, new_tdp_limit;
 368 
 369         if (!ips->cpu_turbo_enabled)
 370                 return;
 371 
 372         rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
 373 
 374         cur_tdp_limit = turbo_override & TURBO_TDP_MASK;
 375         new_tdp_limit = cur_tdp_limit + 8; /* 1W increase */
 376 
 377         /* Clamp to SKU TDP limit */
 378         if (((new_tdp_limit * 10) / 8) > ips->core_power_limit)
 379                 new_tdp_limit = cur_tdp_limit;
 380 
 381         thm_writew(THM_MPCPC, (new_tdp_limit * 10) / 8);
 382 
 383         turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN;
 384         wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
 385 
 386         turbo_override &= ~TURBO_TDP_MASK;
 387         turbo_override |= new_tdp_limit;
 388 
 389         wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
 390 }
 391 
 392 /**
 393  * ips_cpu_lower - lower CPU power clamp
 394  * @ips: IPS driver struct
 395  *
 396  * Lower CPU power clamp b %IPS_CPU_STEP if possible.
 397  *
 398  * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR down, going
 399  * as low as the platform limits will allow (though we could go lower there
 400  * wouldn't be much point).
 401  */
 402 static void ips_cpu_lower(struct ips_driver *ips)
 403 {
 404         u64 turbo_override;
 405         u16 cur_limit, new_limit;
 406 
 407         rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
 408 
 409         cur_limit = turbo_override & TURBO_TDP_MASK;
 410         new_limit = cur_limit - 8; /* 1W decrease */
 411 
 412         /* Clamp to SKU TDP limit */
 413         if (new_limit  < (ips->orig_turbo_limit & TURBO_TDP_MASK))
 414                 new_limit = ips->orig_turbo_limit & TURBO_TDP_MASK;
 415 
 416         thm_writew(THM_MPCPC, (new_limit * 10) / 8);
 417 
 418         turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN;
 419         wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
 420 
 421         turbo_override &= ~TURBO_TDP_MASK;
 422         turbo_override |= new_limit;
 423 
 424         wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
 425 }
 426 
 427 /**
 428  * do_enable_cpu_turbo - internal turbo enable function
 429  * @data: unused
 430  *
 431  * Internal function for actually updating MSRs.  When we enable/disable
 432  * turbo, we need to do it on each CPU; this function is the one called
 433  * by on_each_cpu() when needed.
 434  */
 435 static void do_enable_cpu_turbo(void *data)
 436 {
 437         u64 perf_ctl;
 438 
 439         rdmsrl(IA32_PERF_CTL, perf_ctl);
 440         if (perf_ctl & IA32_PERF_TURBO_DIS) {
 441                 perf_ctl &= ~IA32_PERF_TURBO_DIS;
 442                 wrmsrl(IA32_PERF_CTL, perf_ctl);
 443         }
 444 }
 445 
 446 /**
 447  * ips_enable_cpu_turbo - enable turbo mode on all CPUs
 448  * @ips: IPS driver struct
 449  *
 450  * Enable turbo mode by clearing the disable bit in IA32_PERF_CTL on
 451  * all logical threads.
 452  */
 453 static void ips_enable_cpu_turbo(struct ips_driver *ips)
 454 {
 455         /* Already on, no need to mess with MSRs */
 456         if (ips->__cpu_turbo_on)
 457                 return;
 458 
 459         if (ips->turbo_toggle_allowed)
 460                 on_each_cpu(do_enable_cpu_turbo, ips, 1);
 461 
 462         ips->__cpu_turbo_on = true;
 463 }
 464 
 465 /**
 466  * do_disable_cpu_turbo - internal turbo disable function
 467  * @data: unused
 468  *
 469  * Internal function for actually updating MSRs.  When we enable/disable
 470  * turbo, we need to do it on each CPU; this function is the one called
 471  * by on_each_cpu() when needed.
 472  */
 473 static void do_disable_cpu_turbo(void *data)
 474 {
 475         u64 perf_ctl;
 476 
 477         rdmsrl(IA32_PERF_CTL, perf_ctl);
 478         if (!(perf_ctl & IA32_PERF_TURBO_DIS)) {
 479                 perf_ctl |= IA32_PERF_TURBO_DIS;
 480                 wrmsrl(IA32_PERF_CTL, perf_ctl);
 481         }
 482 }
 483 
 484 /**
 485  * ips_disable_cpu_turbo - disable turbo mode on all CPUs
 486  * @ips: IPS driver struct
 487  *
 488  * Disable turbo mode by setting the disable bit in IA32_PERF_CTL on
 489  * all logical threads.
 490  */
 491 static void ips_disable_cpu_turbo(struct ips_driver *ips)
 492 {
 493         /* Already off, leave it */
 494         if (!ips->__cpu_turbo_on)
 495                 return;
 496 
 497         if (ips->turbo_toggle_allowed)
 498                 on_each_cpu(do_disable_cpu_turbo, ips, 1);
 499 
 500         ips->__cpu_turbo_on = false;
 501 }
 502 
 503 /**
 504  * ips_gpu_busy - is GPU busy?
 505  * @ips: IPS driver struct
 506  *
 507  * Check GPU for load to see whether we should increase its thermal budget.
 508  * We need to call into the i915 driver in this case.
 509  *
 510  * RETURNS:
 511  * True if the GPU could use more power, false otherwise.
 512  */
 513 static bool ips_gpu_busy(struct ips_driver *ips)
 514 {
 515         if (!ips_gpu_turbo_enabled(ips))
 516                 return false;
 517 
 518         return ips->gpu_busy();
 519 }
 520 
 521 /**
 522  * ips_gpu_raise - raise GPU power clamp
 523  * @ips: IPS driver struct
 524  *
 525  * Raise the GPU frequency/power if possible.  We need to call into the
 526  * i915 driver in this case.
 527  */
 528 static void ips_gpu_raise(struct ips_driver *ips)
 529 {
 530         if (!ips_gpu_turbo_enabled(ips))
 531                 return;
 532 
 533         if (!ips->gpu_raise())
 534                 ips->gpu_turbo_enabled = false;
 535 
 536         return;
 537 }
 538 
 539 /**
 540  * ips_gpu_lower - lower GPU power clamp
 541  * @ips: IPS driver struct
 542  *
 543  * Lower GPU frequency/power if possible.  Need to call i915.
 544  */
 545 static void ips_gpu_lower(struct ips_driver *ips)
 546 {
 547         if (!ips_gpu_turbo_enabled(ips))
 548                 return;
 549 
 550         if (!ips->gpu_lower())
 551                 ips->gpu_turbo_enabled = false;
 552 
 553         return;
 554 }
 555 
 556 /**
 557  * ips_enable_gpu_turbo - notify the gfx driver turbo is available
 558  * @ips: IPS driver struct
 559  *
 560  * Call into the graphics driver indicating that it can safely use
 561  * turbo mode.
 562  */
 563 static void ips_enable_gpu_turbo(struct ips_driver *ips)
 564 {
 565         if (ips->__gpu_turbo_on)
 566                 return;
 567         ips->__gpu_turbo_on = true;
 568 }
 569 
 570 /**
 571  * ips_disable_gpu_turbo - notify the gfx driver to disable turbo mode
 572  * @ips: IPS driver struct
 573  *
 574  * Request that the graphics driver disable turbo mode.
 575  */
 576 static void ips_disable_gpu_turbo(struct ips_driver *ips)
 577 {
 578         /* Avoid calling i915 if turbo is already disabled */
 579         if (!ips->__gpu_turbo_on)
 580                 return;
 581 
 582         if (!ips->gpu_turbo_disable())
 583                 dev_err(ips->dev, "failed to disable graphics turbo\n");
 584         else
 585                 ips->__gpu_turbo_on = false;
 586 }
 587 
 588 /**
 589  * mcp_exceeded - check whether we're outside our thermal & power limits
 590  * @ips: IPS driver struct
 591  *
 592  * Check whether the MCP is over its thermal or power budget.
 593  */
 594 static bool mcp_exceeded(struct ips_driver *ips)
 595 {
 596         unsigned long flags;
 597         bool ret = false;
 598         u32 temp_limit;
 599         u32 avg_power;
 600 
 601         spin_lock_irqsave(&ips->turbo_status_lock, flags);
 602 
 603         temp_limit = ips->mcp_temp_limit * 100;
 604         if (ips->mcp_avg_temp > temp_limit)
 605                 ret = true;
 606 
 607         avg_power = ips->cpu_avg_power + ips->mch_avg_power;
 608         if (avg_power > ips->mcp_power_limit)
 609                 ret = true;
 610 
 611         spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
 612 
 613         return ret;
 614 }
 615 
 616 /**
 617  * cpu_exceeded - check whether a CPU core is outside its limits
 618  * @ips: IPS driver struct
 619  * @cpu: CPU number to check
 620  *
 621  * Check a given CPU's average temp or power is over its limit.
 622  */
 623 static bool cpu_exceeded(struct ips_driver *ips, int cpu)
 624 {
 625         unsigned long flags;
 626         int avg;
 627         bool ret = false;
 628 
 629         spin_lock_irqsave(&ips->turbo_status_lock, flags);
 630         avg = cpu ? ips->ctv2_avg_temp : ips->ctv1_avg_temp;
 631         if (avg > (ips->limits->core_temp_limit * 100))
 632                 ret = true;
 633         if (ips->cpu_avg_power > ips->core_power_limit * 100)
 634                 ret = true;
 635         spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
 636 
 637         if (ret)
 638                 dev_info(ips->dev, "CPU power or thermal limit exceeded\n");
 639 
 640         return ret;
 641 }
 642 
 643 /**
 644  * mch_exceeded - check whether the GPU is over budget
 645  * @ips: IPS driver struct
 646  *
 647  * Check the MCH temp & power against their maximums.
 648  */
 649 static bool mch_exceeded(struct ips_driver *ips)
 650 {
 651         unsigned long flags;
 652         bool ret = false;
 653 
 654         spin_lock_irqsave(&ips->turbo_status_lock, flags);
 655         if (ips->mch_avg_temp > (ips->limits->mch_temp_limit * 100))
 656                 ret = true;
 657         if (ips->mch_avg_power > ips->mch_power_limit)
 658                 ret = true;
 659         spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
 660 
 661         return ret;
 662 }
 663 
 664 /**
 665  * verify_limits - verify BIOS provided limits
 666  * @ips: IPS structure
 667  *
 668  * BIOS can optionally provide non-default limits for power and temp.  Check
 669  * them here and use the defaults if the BIOS values are not provided or
 670  * are otherwise unusable.
 671  */
 672 static void verify_limits(struct ips_driver *ips)
 673 {
 674         if (ips->mcp_power_limit < ips->limits->mcp_power_limit ||
 675             ips->mcp_power_limit > 35000)
 676                 ips->mcp_power_limit = ips->limits->mcp_power_limit;
 677 
 678         if (ips->mcp_temp_limit < ips->limits->core_temp_limit ||
 679             ips->mcp_temp_limit < ips->limits->mch_temp_limit ||
 680             ips->mcp_temp_limit > 150)
 681                 ips->mcp_temp_limit = min(ips->limits->core_temp_limit,
 682                                           ips->limits->mch_temp_limit);
 683 }
 684 
 685 /**
 686  * update_turbo_limits - get various limits & settings from regs
 687  * @ips: IPS driver struct
 688  *
 689  * Update the IPS power & temp limits, along with turbo enable flags,
 690  * based on latest register contents.
 691  *
 692  * Used at init time and for runtime BIOS support, which requires polling
 693  * the regs for updates (as a result of AC->DC transition for example).
 694  *
 695  * LOCKING:
 696  * Caller must hold turbo_status_lock (outside of init)
 697  */
 698 static void update_turbo_limits(struct ips_driver *ips)
 699 {
 700         u32 hts = thm_readl(THM_HTS);
 701 
 702         ips->cpu_turbo_enabled = !(hts & HTS_PCTD_DIS);
 703         /* 
 704          * Disable turbo for now, until we can figure out why the power figures
 705          * are wrong
 706          */
 707         ips->cpu_turbo_enabled = false;
 708 
 709         if (ips->gpu_busy)
 710                 ips->gpu_turbo_enabled = !(hts & HTS_GTD_DIS);
 711 
 712         ips->core_power_limit = thm_readw(THM_MPCPC);
 713         ips->mch_power_limit = thm_readw(THM_MMGPC);
 714         ips->mcp_temp_limit = thm_readw(THM_PTL);
 715         ips->mcp_power_limit = thm_readw(THM_MPPC);
 716 
 717         verify_limits(ips);
 718         /* Ignore BIOS CPU vs GPU pref */
 719 }
 720 
 721 /**
 722  * ips_adjust - adjust power clamp based on thermal state
 723  * @data: ips driver structure
 724  *
 725  * Wake up every 5s or so and check whether we should adjust the power clamp.
 726  * Check CPU and GPU load to determine which needs adjustment.  There are
 727  * several things to consider here:
 728  *   - do we need to adjust up or down?
 729  *   - is CPU busy?
 730  *   - is GPU busy?
 731  *   - is CPU in turbo?
 732  *   - is GPU in turbo?
 733  *   - is CPU or GPU preferred? (CPU is default)
 734  *
 735  * So, given the above, we do the following:
 736  *   - up (TDP available)
 737  *     - CPU not busy, GPU not busy - nothing
 738  *     - CPU busy, GPU not busy - adjust CPU up
 739  *     - CPU not busy, GPU busy - adjust GPU up
 740  *     - CPU busy, GPU busy - adjust preferred unit up, taking headroom from
 741  *       non-preferred unit if necessary
 742  *   - down (at TDP limit)
 743  *     - adjust both CPU and GPU down if possible
 744  *
 745                 cpu+ gpu+       cpu+gpu-        cpu-gpu+        cpu-gpu-
 746 cpu < gpu <     cpu+gpu+        cpu+            gpu+            nothing
 747 cpu < gpu >=    cpu+gpu-(mcp<)  cpu+gpu-(mcp<)  gpu-            gpu-
 748 cpu >= gpu <    cpu-gpu+(mcp<)  cpu-            cpu-gpu+(mcp<)  cpu-
 749 cpu >= gpu >=   cpu-gpu-        cpu-gpu-        cpu-gpu-        cpu-gpu-
 750  *
 751  */
 752 static int ips_adjust(void *data)
 753 {
 754         struct ips_driver *ips = data;
 755         unsigned long flags;
 756 
 757         dev_dbg(ips->dev, "starting ips-adjust thread\n");
 758 
 759         /*
 760          * Adjust CPU and GPU clamps every 5s if needed.  Doing it more
 761          * often isn't recommended due to ME interaction.
 762          */
 763         do {
 764                 bool cpu_busy = ips_cpu_busy(ips);
 765                 bool gpu_busy = ips_gpu_busy(ips);
 766 
 767                 spin_lock_irqsave(&ips->turbo_status_lock, flags);
 768                 if (ips->poll_turbo_status)
 769                         update_turbo_limits(ips);
 770                 spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
 771 
 772                 /* Update turbo status if necessary */
 773                 if (ips->cpu_turbo_enabled)
 774                         ips_enable_cpu_turbo(ips);
 775                 else
 776                         ips_disable_cpu_turbo(ips);
 777 
 778                 if (ips->gpu_turbo_enabled)
 779                         ips_enable_gpu_turbo(ips);
 780                 else
 781                         ips_disable_gpu_turbo(ips);
 782 
 783                 /* We're outside our comfort zone, crank them down */
 784                 if (mcp_exceeded(ips)) {
 785                         ips_cpu_lower(ips);
 786                         ips_gpu_lower(ips);
 787                         goto sleep;
 788                 }
 789 
 790                 if (!cpu_exceeded(ips, 0) && cpu_busy)
 791                         ips_cpu_raise(ips);
 792                 else
 793                         ips_cpu_lower(ips);
 794 
 795                 if (!mch_exceeded(ips) && gpu_busy)
 796                         ips_gpu_raise(ips);
 797                 else
 798                         ips_gpu_lower(ips);
 799 
 800 sleep:
 801                 schedule_timeout_interruptible(msecs_to_jiffies(IPS_ADJUST_PERIOD));
 802         } while (!kthread_should_stop());
 803 
 804         dev_dbg(ips->dev, "ips-adjust thread stopped\n");
 805 
 806         return 0;
 807 }
 808 
 809 /*
 810  * Helpers for reading out temp/power values and calculating their
 811  * averages for the decision making and monitoring functions.
 812  */
 813 
 814 static u16 calc_avg_temp(struct ips_driver *ips, u16 *array)
 815 {
 816         u64 total = 0;
 817         int i;
 818         u16 avg;
 819 
 820         for (i = 0; i < IPS_SAMPLE_COUNT; i++)
 821                 total += (u64)(array[i] * 100);
 822 
 823         do_div(total, IPS_SAMPLE_COUNT);
 824 
 825         avg = (u16)total;
 826 
 827         return avg;
 828 }
 829 
 830 static u16 read_mgtv(struct ips_driver *ips)
 831 {
 832         u16 ret;
 833         u64 slope, offset;
 834         u64 val;
 835 
 836         val = thm_readq(THM_MGTV);
 837         val = (val & TV_MASK) >> TV_SHIFT;
 838 
 839         slope = offset = thm_readw(THM_MGTA);
 840         slope = (slope & MGTA_SLOPE_MASK) >> MGTA_SLOPE_SHIFT;
 841         offset = offset & MGTA_OFFSET_MASK;
 842 
 843         ret = ((val * slope + 0x40) >> 7) + offset;
 844 
 845         return 0; /* MCH temp reporting buggy */
 846 }
 847 
 848 static u16 read_ptv(struct ips_driver *ips)
 849 {
 850         u16 val;
 851 
 852         val = thm_readw(THM_PTV) & PTV_MASK;
 853 
 854         return val;
 855 }
 856 
 857 static u16 read_ctv(struct ips_driver *ips, int cpu)
 858 {
 859         int reg = cpu ? THM_CTV2 : THM_CTV1;
 860         u16 val;
 861 
 862         val = thm_readw(reg);
 863         if (!(val & CTV_TEMP_ERROR))
 864                 val = (val) >> 6; /* discard fractional component */
 865         else
 866                 val = 0;
 867 
 868         return val;
 869 }
 870 
 871 static u32 get_cpu_power(struct ips_driver *ips, u32 *last, int period)
 872 {
 873         u32 val;
 874         u32 ret;
 875 
 876         /*
 877          * CEC is in joules/65535.  Take difference over time to
 878          * get watts.
 879          */
 880         val = thm_readl(THM_CEC);
 881 
 882         /* period is in ms and we want mW */
 883         ret = (((val - *last) * 1000) / period);
 884         ret = (ret * 1000) / 65535;
 885         *last = val;
 886 
 887         return 0;
 888 }
 889 
 890 static const u16 temp_decay_factor = 2;
 891 static u16 update_average_temp(u16 avg, u16 val)
 892 {
 893         u16 ret;
 894 
 895         /* Multiply by 100 for extra precision */
 896         ret = (val * 100 / temp_decay_factor) +
 897                 (((temp_decay_factor - 1) * avg) / temp_decay_factor);
 898         return ret;
 899 }
 900 
 901 static const u16 power_decay_factor = 2;
 902 static u16 update_average_power(u32 avg, u32 val)
 903 {
 904         u32 ret;
 905 
 906         ret = (val / power_decay_factor) +
 907                 (((power_decay_factor - 1) * avg) / power_decay_factor);
 908 
 909         return ret;
 910 }
 911 
 912 static u32 calc_avg_power(struct ips_driver *ips, u32 *array)
 913 {
 914         u64 total = 0;
 915         u32 avg;
 916         int i;
 917 
 918         for (i = 0; i < IPS_SAMPLE_COUNT; i++)
 919                 total += array[i];
 920 
 921         do_div(total, IPS_SAMPLE_COUNT);
 922         avg = (u32)total;
 923 
 924         return avg;
 925 }
 926 
 927 static void monitor_timeout(struct timer_list *t)
 928 {
 929         struct ips_driver *ips = from_timer(ips, t, timer);
 930         wake_up_process(ips->monitor);
 931 }
 932 
 933 /**
 934  * ips_monitor - temp/power monitoring thread
 935  * @data: ips driver structure
 936  *
 937  * This is the main function for the IPS driver.  It monitors power and
 938  * tempurature in the MCP and adjusts CPU and GPU power clams accordingly.
 939  *
 940  * We keep a 5s moving average of power consumption and tempurature.  Using
 941  * that data, along with CPU vs GPU preference, we adjust the power clamps
 942  * up or down.
 943  */
 944 static int ips_monitor(void *data)
 945 {
 946         struct ips_driver *ips = data;
 947         unsigned long seqno_timestamp, expire, last_msecs, last_sample_period;
 948         int i;
 949         u32 *cpu_samples, *mchp_samples, old_cpu_power;
 950         u16 *mcp_samples, *ctv1_samples, *ctv2_samples, *mch_samples;
 951         u8 cur_seqno, last_seqno;
 952 
 953         mcp_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u16), GFP_KERNEL);
 954         ctv1_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u16), GFP_KERNEL);
 955         ctv2_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u16), GFP_KERNEL);
 956         mch_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u16), GFP_KERNEL);
 957         cpu_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u32), GFP_KERNEL);
 958         mchp_samples = kcalloc(IPS_SAMPLE_COUNT, sizeof(u32), GFP_KERNEL);
 959         if (!mcp_samples || !ctv1_samples || !ctv2_samples || !mch_samples ||
 960                         !cpu_samples || !mchp_samples) {
 961                 dev_err(ips->dev,
 962                         "failed to allocate sample array, ips disabled\n");
 963                 kfree(mcp_samples);
 964                 kfree(ctv1_samples);
 965                 kfree(ctv2_samples);
 966                 kfree(mch_samples);
 967                 kfree(cpu_samples);
 968                 kfree(mchp_samples);
 969                 return -ENOMEM;
 970         }
 971 
 972         last_seqno = (thm_readl(THM_ITV) & ITV_ME_SEQNO_MASK) >>
 973                 ITV_ME_SEQNO_SHIFT;
 974         seqno_timestamp = get_jiffies_64();
 975 
 976         old_cpu_power = thm_readl(THM_CEC);
 977         schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
 978 
 979         /* Collect an initial average */
 980         for (i = 0; i < IPS_SAMPLE_COUNT; i++) {
 981                 u32 mchp, cpu_power;
 982                 u16 val;
 983 
 984                 mcp_samples[i] = read_ptv(ips);
 985 
 986                 val = read_ctv(ips, 0);
 987                 ctv1_samples[i] = val;
 988 
 989                 val = read_ctv(ips, 1);
 990                 ctv2_samples[i] = val;
 991 
 992                 val = read_mgtv(ips);
 993                 mch_samples[i] = val;
 994 
 995                 cpu_power = get_cpu_power(ips, &old_cpu_power,
 996                                           IPS_SAMPLE_PERIOD);
 997                 cpu_samples[i] = cpu_power;
 998 
 999                 if (ips->read_mch_val) {
1000                         mchp = ips->read_mch_val();
1001                         mchp_samples[i] = mchp;
1002                 }
1003 
1004                 schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
1005                 if (kthread_should_stop())
1006                         break;
1007         }
1008 
1009         ips->mcp_avg_temp = calc_avg_temp(ips, mcp_samples);
1010         ips->ctv1_avg_temp = calc_avg_temp(ips, ctv1_samples);
1011         ips->ctv2_avg_temp = calc_avg_temp(ips, ctv2_samples);
1012         ips->mch_avg_temp = calc_avg_temp(ips, mch_samples);
1013         ips->cpu_avg_power = calc_avg_power(ips, cpu_samples);
1014         ips->mch_avg_power = calc_avg_power(ips, mchp_samples);
1015         kfree(mcp_samples);
1016         kfree(ctv1_samples);
1017         kfree(ctv2_samples);
1018         kfree(mch_samples);
1019         kfree(cpu_samples);
1020         kfree(mchp_samples);
1021 
1022         /* Start the adjustment thread now that we have data */
1023         wake_up_process(ips->adjust);
1024 
1025         /*
1026          * Ok, now we have an initial avg.  From here on out, we track the
1027          * running avg using a decaying average calculation.  This allows
1028          * us to reduce the sample frequency if the CPU and GPU are idle.
1029          */
1030         old_cpu_power = thm_readl(THM_CEC);
1031         schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
1032         last_sample_period = IPS_SAMPLE_PERIOD;
1033 
1034         timer_setup(&ips->timer, monitor_timeout, TIMER_DEFERRABLE);
1035         do {
1036                 u32 cpu_val, mch_val;
1037                 u16 val;
1038 
1039                 /* MCP itself */
1040                 val = read_ptv(ips);
1041                 ips->mcp_avg_temp = update_average_temp(ips->mcp_avg_temp, val);
1042 
1043                 /* Processor 0 */
1044                 val = read_ctv(ips, 0);
1045                 ips->ctv1_avg_temp =
1046                         update_average_temp(ips->ctv1_avg_temp, val);
1047                 /* Power */
1048                 cpu_val = get_cpu_power(ips, &old_cpu_power,
1049                                         last_sample_period);
1050                 ips->cpu_avg_power =
1051                         update_average_power(ips->cpu_avg_power, cpu_val);
1052 
1053                 if (ips->second_cpu) {
1054                         /* Processor 1 */
1055                         val = read_ctv(ips, 1);
1056                         ips->ctv2_avg_temp =
1057                                 update_average_temp(ips->ctv2_avg_temp, val);
1058                 }
1059 
1060                 /* MCH */
1061                 val = read_mgtv(ips);
1062                 ips->mch_avg_temp = update_average_temp(ips->mch_avg_temp, val);
1063                 /* Power */
1064                 if (ips->read_mch_val) {
1065                         mch_val = ips->read_mch_val();
1066                         ips->mch_avg_power =
1067                                 update_average_power(ips->mch_avg_power,
1068                                                      mch_val);
1069                 }
1070 
1071                 /*
1072                  * Make sure ME is updating thermal regs.
1073                  * Note:
1074                  * If it's been more than a second since the last update,
1075                  * the ME is probably hung.
1076                  */
1077                 cur_seqno = (thm_readl(THM_ITV) & ITV_ME_SEQNO_MASK) >>
1078                         ITV_ME_SEQNO_SHIFT;
1079                 if (cur_seqno == last_seqno &&
1080                     time_after(jiffies, seqno_timestamp + HZ)) {
1081                         dev_warn(ips->dev,
1082                                  "ME failed to update for more than 1s, likely hung\n");
1083                 } else {
1084                         seqno_timestamp = get_jiffies_64();
1085                         last_seqno = cur_seqno;
1086                 }
1087 
1088                 last_msecs = jiffies_to_msecs(jiffies);
1089                 expire = jiffies + msecs_to_jiffies(IPS_SAMPLE_PERIOD);
1090 
1091                 __set_current_state(TASK_INTERRUPTIBLE);
1092                 mod_timer(&ips->timer, expire);
1093                 schedule();
1094 
1095                 /* Calculate actual sample period for power averaging */
1096                 last_sample_period = jiffies_to_msecs(jiffies) - last_msecs;
1097                 if (!last_sample_period)
1098                         last_sample_period = 1;
1099         } while (!kthread_should_stop());
1100 
1101         del_timer_sync(&ips->timer);
1102 
1103         dev_dbg(ips->dev, "ips-monitor thread stopped\n");
1104 
1105         return 0;
1106 }
1107 
1108 #if 0
1109 #define THM_DUMPW(reg) \
1110         { \
1111         u16 val = thm_readw(reg); \
1112         dev_dbg(ips->dev, #reg ": 0x%04x\n", val); \
1113         }
1114 #define THM_DUMPL(reg) \
1115         { \
1116         u32 val = thm_readl(reg); \
1117         dev_dbg(ips->dev, #reg ": 0x%08x\n", val); \
1118         }
1119 #define THM_DUMPQ(reg) \
1120         { \
1121         u64 val = thm_readq(reg); \
1122         dev_dbg(ips->dev, #reg ": 0x%016x\n", val); \
1123         }
1124 
1125 static void dump_thermal_info(struct ips_driver *ips)
1126 {
1127         u16 ptl;
1128 
1129         ptl = thm_readw(THM_PTL);
1130         dev_dbg(ips->dev, "Processor temp limit: %d\n", ptl);
1131 
1132         THM_DUMPW(THM_CTA);
1133         THM_DUMPW(THM_TRC);
1134         THM_DUMPW(THM_CTV1);
1135         THM_DUMPL(THM_STS);
1136         THM_DUMPW(THM_PTV);
1137         THM_DUMPQ(THM_MGTV);
1138 }
1139 #endif
1140 
1141 /**
1142  * ips_irq_handler - handle temperature triggers and other IPS events
1143  * @irq: irq number
1144  * @arg: unused
1145  *
1146  * Handle temperature limit trigger events, generally by lowering the clamps.
1147  * If we're at a critical limit, we clamp back to the lowest possible value
1148  * to prevent emergency shutdown.
1149  */
1150 static irqreturn_t ips_irq_handler(int irq, void *arg)
1151 {
1152         struct ips_driver *ips = arg;
1153         u8 tses = thm_readb(THM_TSES);
1154         u8 tes = thm_readb(THM_TES);
1155 
1156         if (!tses && !tes)
1157                 return IRQ_NONE;
1158 
1159         dev_info(ips->dev, "TSES: 0x%02x\n", tses);
1160         dev_info(ips->dev, "TES: 0x%02x\n", tes);
1161 
1162         /* STS update from EC? */
1163         if (tes & 1) {
1164                 u32 sts, tc1;
1165 
1166                 sts = thm_readl(THM_STS);
1167                 tc1 = thm_readl(THM_TC1);
1168 
1169                 if (sts & STS_NVV) {
1170                         spin_lock(&ips->turbo_status_lock);
1171                         ips->core_power_limit = (sts & STS_PCPL_MASK) >>
1172                                 STS_PCPL_SHIFT;
1173                         ips->mch_power_limit = (sts & STS_GPL_MASK) >>
1174                                 STS_GPL_SHIFT;
1175                         /* ignore EC CPU vs GPU pref */
1176                         ips->cpu_turbo_enabled = !(sts & STS_PCTD_DIS);
1177                         /* 
1178                          * Disable turbo for now, until we can figure
1179                          * out why the power figures are wrong
1180                          */
1181                         ips->cpu_turbo_enabled = false;
1182                         if (ips->gpu_busy)
1183                                 ips->gpu_turbo_enabled = !(sts & STS_GTD_DIS);
1184                         ips->mcp_temp_limit = (sts & STS_PTL_MASK) >>
1185                                 STS_PTL_SHIFT;
1186                         ips->mcp_power_limit = (tc1 & STS_PPL_MASK) >>
1187                                 STS_PPL_SHIFT;
1188                         verify_limits(ips);
1189                         spin_unlock(&ips->turbo_status_lock);
1190 
1191                         thm_writeb(THM_SEC, SEC_ACK);
1192                 }
1193                 thm_writeb(THM_TES, tes);
1194         }
1195 
1196         /* Thermal trip */
1197         if (tses) {
1198                 dev_warn(ips->dev, "thermal trip occurred, tses: 0x%04x\n",
1199                          tses);
1200                 thm_writeb(THM_TSES, tses);
1201         }
1202 
1203         return IRQ_HANDLED;
1204 }
1205 
1206 #ifndef CONFIG_DEBUG_FS
1207 static void ips_debugfs_init(struct ips_driver *ips) { return; }
1208 static void ips_debugfs_cleanup(struct ips_driver *ips) { return; }
1209 #else
1210 
1211 /* Expose current state and limits in debugfs if possible */
1212 
1213 static int cpu_temp_show(struct seq_file *m, void *data)
1214 {
1215         struct ips_driver *ips = m->private;
1216 
1217         seq_printf(m, "%d.%02d\n", ips->ctv1_avg_temp / 100,
1218                    ips->ctv1_avg_temp % 100);
1219 
1220         return 0;
1221 }
1222 DEFINE_SHOW_ATTRIBUTE(cpu_temp);
1223 
1224 static int cpu_power_show(struct seq_file *m, void *data)
1225 {
1226         struct ips_driver *ips = m->private;
1227 
1228         seq_printf(m, "%dmW\n", ips->cpu_avg_power);
1229 
1230         return 0;
1231 }
1232 DEFINE_SHOW_ATTRIBUTE(cpu_power);
1233 
1234 static int cpu_clamp_show(struct seq_file *m, void *data)
1235 {
1236         u64 turbo_override;
1237         int tdp, tdc;
1238 
1239         rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
1240 
1241         tdp = (int)(turbo_override & TURBO_TDP_MASK);
1242         tdc = (int)((turbo_override & TURBO_TDC_MASK) >> TURBO_TDC_SHIFT);
1243 
1244         /* Convert to .1W/A units */
1245         tdp = tdp * 10 / 8;
1246         tdc = tdc * 10 / 8;
1247 
1248         /* Watts Amperes */
1249         seq_printf(m, "%d.%dW %d.%dA\n", tdp / 10, tdp % 10,
1250                    tdc / 10, tdc % 10);
1251 
1252         return 0;
1253 }
1254 DEFINE_SHOW_ATTRIBUTE(cpu_clamp);
1255 
1256 static int mch_temp_show(struct seq_file *m, void *data)
1257 {
1258         struct ips_driver *ips = m->private;
1259 
1260         seq_printf(m, "%d.%02d\n", ips->mch_avg_temp / 100,
1261                    ips->mch_avg_temp % 100);
1262 
1263         return 0;
1264 }
1265 DEFINE_SHOW_ATTRIBUTE(mch_temp);
1266 
1267 static int mch_power_show(struct seq_file *m, void *data)
1268 {
1269         struct ips_driver *ips = m->private;
1270 
1271         seq_printf(m, "%dmW\n", ips->mch_avg_power);
1272 
1273         return 0;
1274 }
1275 DEFINE_SHOW_ATTRIBUTE(mch_power);
1276 
1277 static void ips_debugfs_cleanup(struct ips_driver *ips)
1278 {
1279         debugfs_remove_recursive(ips->debug_root);
1280 }
1281 
1282 static void ips_debugfs_init(struct ips_driver *ips)
1283 {
1284         ips->debug_root = debugfs_create_dir("ips", NULL);
1285 
1286         debugfs_create_file("cpu_temp", 0444, ips->debug_root, ips, &cpu_temp_fops);
1287         debugfs_create_file("cpu_power", 0444, ips->debug_root, ips, &cpu_power_fops);
1288         debugfs_create_file("cpu_clamp", 0444, ips->debug_root, ips, &cpu_clamp_fops);
1289         debugfs_create_file("mch_temp", 0444, ips->debug_root, ips, &mch_temp_fops);
1290         debugfs_create_file("mch_power", 0444, ips->debug_root, ips, &mch_power_fops);
1291 }
1292 #endif /* CONFIG_DEBUG_FS */
1293 
1294 /**
1295  * ips_detect_cpu - detect whether CPU supports IPS
1296  *
1297  * Walk our list and see if we're on a supported CPU.  If we find one,
1298  * return the limits for it.
1299  */
1300 static struct ips_mcp_limits *ips_detect_cpu(struct ips_driver *ips)
1301 {
1302         u64 turbo_power, misc_en;
1303         struct ips_mcp_limits *limits = NULL;
1304         u16 tdp;
1305 
1306         if (!(boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 37)) {
1307                 dev_info(ips->dev, "Non-IPS CPU detected.\n");
1308                 return NULL;
1309         }
1310 
1311         rdmsrl(IA32_MISC_ENABLE, misc_en);
1312         /*
1313          * If the turbo enable bit isn't set, we shouldn't try to enable/disable
1314          * turbo manually or we'll get an illegal MSR access, even though
1315          * turbo will still be available.
1316          */
1317         if (misc_en & IA32_MISC_TURBO_EN)
1318                 ips->turbo_toggle_allowed = true;
1319         else
1320                 ips->turbo_toggle_allowed = false;
1321 
1322         if (strstr(boot_cpu_data.x86_model_id, "CPU       M"))
1323                 limits = &ips_sv_limits;
1324         else if (strstr(boot_cpu_data.x86_model_id, "CPU       L"))
1325                 limits = &ips_lv_limits;
1326         else if (strstr(boot_cpu_data.x86_model_id, "CPU       U"))
1327                 limits = &ips_ulv_limits;
1328         else {
1329                 dev_info(ips->dev, "No CPUID match found.\n");
1330                 return NULL;
1331         }
1332 
1333         rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_power);
1334         tdp = turbo_power & TURBO_TDP_MASK;
1335 
1336         /* Sanity check TDP against CPU */
1337         if (limits->core_power_limit != (tdp / 8) * 1000) {
1338                 dev_info(ips->dev,
1339                          "CPU TDP doesn't match expected value (found %d, expected %d)\n",
1340                          tdp / 8, limits->core_power_limit / 1000);
1341                 limits->core_power_limit = (tdp / 8) * 1000;
1342         }
1343 
1344         return limits;
1345 }
1346 
1347 /**
1348  * ips_get_i915_syms - try to get GPU control methods from i915 driver
1349  * @ips: IPS driver
1350  *
1351  * The i915 driver exports several interfaces to allow the IPS driver to
1352  * monitor and control graphics turbo mode.  If we can find them, we can
1353  * enable graphics turbo, otherwise we must disable it to avoid exceeding
1354  * thermal and power limits in the MCP.
1355  */
1356 static bool ips_get_i915_syms(struct ips_driver *ips)
1357 {
1358         ips->read_mch_val = symbol_get(i915_read_mch_val);
1359         if (!ips->read_mch_val)
1360                 goto out_err;
1361         ips->gpu_raise = symbol_get(i915_gpu_raise);
1362         if (!ips->gpu_raise)
1363                 goto out_put_mch;
1364         ips->gpu_lower = symbol_get(i915_gpu_lower);
1365         if (!ips->gpu_lower)
1366                 goto out_put_raise;
1367         ips->gpu_busy = symbol_get(i915_gpu_busy);
1368         if (!ips->gpu_busy)
1369                 goto out_put_lower;
1370         ips->gpu_turbo_disable = symbol_get(i915_gpu_turbo_disable);
1371         if (!ips->gpu_turbo_disable)
1372                 goto out_put_busy;
1373 
1374         return true;
1375 
1376 out_put_busy:
1377         symbol_put(i915_gpu_busy);
1378 out_put_lower:
1379         symbol_put(i915_gpu_lower);
1380 out_put_raise:
1381         symbol_put(i915_gpu_raise);
1382 out_put_mch:
1383         symbol_put(i915_read_mch_val);
1384 out_err:
1385         return false;
1386 }
1387 
1388 static bool
1389 ips_gpu_turbo_enabled(struct ips_driver *ips)
1390 {
1391         if (!ips->gpu_busy && late_i915_load) {
1392                 if (ips_get_i915_syms(ips)) {
1393                         dev_info(ips->dev,
1394                                  "i915 driver attached, reenabling gpu turbo\n");
1395                         ips->gpu_turbo_enabled = !(thm_readl(THM_HTS) & HTS_GTD_DIS);
1396                 }
1397         }
1398 
1399         return ips->gpu_turbo_enabled;
1400 }
1401 
1402 void
1403 ips_link_to_i915_driver(void)
1404 {
1405         /* We can't cleanly get at the various ips_driver structs from
1406          * this caller (the i915 driver), so just set a flag saying
1407          * that it's time to try getting the symbols again.
1408          */
1409         late_i915_load = true;
1410 }
1411 EXPORT_SYMBOL_GPL(ips_link_to_i915_driver);
1412 
1413 static const struct pci_device_id ips_id_table[] = {
1414         { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_THERMAL_SENSOR), },
1415         { 0, }
1416 };
1417 
1418 MODULE_DEVICE_TABLE(pci, ips_id_table);
1419 
1420 static int ips_blacklist_callback(const struct dmi_system_id *id)
1421 {
1422         pr_info("Blacklisted intel_ips for %s\n", id->ident);
1423         return 1;
1424 }
1425 
1426 static const struct dmi_system_id ips_blacklist[] = {
1427         {
1428                 .callback = ips_blacklist_callback,
1429                 .ident = "HP ProBook",
1430                 .matches = {
1431                         DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
1432                         DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook"),
1433                 },
1434         },
1435         { }     /* terminating entry */
1436 };
1437 
1438 static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)
1439 {
1440         u64 platform_info;
1441         struct ips_driver *ips;
1442         u32 hts;
1443         int ret = 0;
1444         u16 htshi, trc, trc_required_mask;
1445         u8 tse;
1446 
1447         if (dmi_check_system(ips_blacklist))
1448                 return -ENODEV;
1449 
1450         ips = devm_kzalloc(&dev->dev, sizeof(*ips), GFP_KERNEL);
1451         if (!ips)
1452                 return -ENOMEM;
1453 
1454         spin_lock_init(&ips->turbo_status_lock);
1455         ips->dev = &dev->dev;
1456 
1457         ips->limits = ips_detect_cpu(ips);
1458         if (!ips->limits) {
1459                 dev_info(&dev->dev, "IPS not supported on this CPU\n");
1460                 return -ENXIO;
1461         }
1462 
1463         ret = pcim_enable_device(dev);
1464         if (ret) {
1465                 dev_err(&dev->dev, "can't enable PCI device, aborting\n");
1466                 return ret;
1467         }
1468 
1469         ret = pcim_iomap_regions(dev, 1 << 0, pci_name(dev));
1470         if (ret) {
1471                 dev_err(&dev->dev, "failed to map thermal regs, aborting\n");
1472                 return ret;
1473         }
1474         ips->regmap = pcim_iomap_table(dev)[0];
1475 
1476         pci_set_drvdata(dev, ips);
1477 
1478         tse = thm_readb(THM_TSE);
1479         if (tse != TSE_EN) {
1480                 dev_err(&dev->dev, "thermal device not enabled (0x%02x), aborting\n", tse);
1481                 return -ENXIO;
1482         }
1483 
1484         trc = thm_readw(THM_TRC);
1485         trc_required_mask = TRC_CORE1_EN | TRC_CORE_PWR | TRC_MCH_EN;
1486         if ((trc & trc_required_mask) != trc_required_mask) {
1487                 dev_err(&dev->dev, "thermal reporting for required devices not enabled, aborting\n");
1488                 return -ENXIO;
1489         }
1490 
1491         if (trc & TRC_CORE2_EN)
1492                 ips->second_cpu = true;
1493 
1494         update_turbo_limits(ips);
1495         dev_dbg(&dev->dev, "max cpu power clamp: %dW\n",
1496                 ips->mcp_power_limit / 10);
1497         dev_dbg(&dev->dev, "max core power clamp: %dW\n",
1498                 ips->core_power_limit / 10);
1499         /* BIOS may update limits at runtime */
1500         if (thm_readl(THM_PSC) & PSP_PBRT)
1501                 ips->poll_turbo_status = true;
1502 
1503         if (!ips_get_i915_syms(ips)) {
1504                 dev_info(&dev->dev, "failed to get i915 symbols, graphics turbo disabled until i915 loads\n");
1505                 ips->gpu_turbo_enabled = false;
1506         } else {
1507                 dev_dbg(&dev->dev, "graphics turbo enabled\n");
1508                 ips->gpu_turbo_enabled = true;
1509         }
1510 
1511         /*
1512          * Check PLATFORM_INFO MSR to make sure this chip is
1513          * turbo capable.
1514          */
1515         rdmsrl(PLATFORM_INFO, platform_info);
1516         if (!(platform_info & PLATFORM_TDP)) {
1517                 dev_err(&dev->dev, "platform indicates TDP override unavailable, aborting\n");
1518                 return -ENODEV;
1519         }
1520 
1521         /*
1522          * IRQ handler for ME interaction
1523          * Note: don't use MSI here as the PCH has bugs.
1524          */
1525         ret = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_LEGACY);
1526         if (ret < 0)
1527                 return ret;
1528 
1529         ips->irq = pci_irq_vector(dev, 0);
1530 
1531         ret = request_irq(ips->irq, ips_irq_handler, IRQF_SHARED, "ips", ips);
1532         if (ret) {
1533                 dev_err(&dev->dev, "request irq failed, aborting\n");
1534                 return ret;
1535         }
1536 
1537         /* Enable aux, hot & critical interrupts */
1538         thm_writeb(THM_TSPIEN, TSPIEN_AUX2_LOHI | TSPIEN_CRIT_LOHI |
1539                    TSPIEN_HOT_LOHI | TSPIEN_AUX_LOHI);
1540         thm_writeb(THM_TEN, TEN_UPDATE_EN);
1541 
1542         /* Collect adjustment values */
1543         ips->cta_val = thm_readw(THM_CTA);
1544         ips->pta_val = thm_readw(THM_PTA);
1545         ips->mgta_val = thm_readw(THM_MGTA);
1546 
1547         /* Save turbo limits & ratios */
1548         rdmsrl(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit);
1549 
1550         ips_disable_cpu_turbo(ips);
1551         ips->cpu_turbo_enabled = false;
1552 
1553         /* Create thermal adjust thread */
1554         ips->adjust = kthread_create(ips_adjust, ips, "ips-adjust");
1555         if (IS_ERR(ips->adjust)) {
1556                 dev_err(&dev->dev,
1557                         "failed to create thermal adjust thread, aborting\n");
1558                 ret = -ENOMEM;
1559                 goto error_free_irq;
1560 
1561         }
1562 
1563         /*
1564          * Set up the work queue and monitor thread. The monitor thread
1565          * will wake up ips_adjust thread.
1566          */
1567         ips->monitor = kthread_run(ips_monitor, ips, "ips-monitor");
1568         if (IS_ERR(ips->monitor)) {
1569                 dev_err(&dev->dev,
1570                         "failed to create thermal monitor thread, aborting\n");
1571                 ret = -ENOMEM;
1572                 goto error_thread_cleanup;
1573         }
1574 
1575         hts = (ips->core_power_limit << HTS_PCPL_SHIFT) |
1576                 (ips->mcp_temp_limit << HTS_PTL_SHIFT) | HTS_NVV;
1577         htshi = HTS2_PRST_RUNNING << HTS2_PRST_SHIFT;
1578 
1579         thm_writew(THM_HTSHI, htshi);
1580         thm_writel(THM_HTS, hts);
1581 
1582         ips_debugfs_init(ips);
1583 
1584         dev_info(&dev->dev, "IPS driver initialized, MCP temp limit %d\n",
1585                  ips->mcp_temp_limit);
1586         return ret;
1587 
1588 error_thread_cleanup:
1589         kthread_stop(ips->adjust);
1590 error_free_irq:
1591         free_irq(ips->irq, ips);
1592         pci_free_irq_vectors(dev);
1593         return ret;
1594 }
1595 
1596 static void ips_remove(struct pci_dev *dev)
1597 {
1598         struct ips_driver *ips = pci_get_drvdata(dev);
1599         u64 turbo_override;
1600 
1601         ips_debugfs_cleanup(ips);
1602 
1603         /* Release i915 driver */
1604         if (ips->read_mch_val)
1605                 symbol_put(i915_read_mch_val);
1606         if (ips->gpu_raise)
1607                 symbol_put(i915_gpu_raise);
1608         if (ips->gpu_lower)
1609                 symbol_put(i915_gpu_lower);
1610         if (ips->gpu_busy)
1611                 symbol_put(i915_gpu_busy);
1612         if (ips->gpu_turbo_disable)
1613                 symbol_put(i915_gpu_turbo_disable);
1614 
1615         rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
1616         turbo_override &= ~(TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN);
1617         wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
1618         wrmsrl(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit);
1619 
1620         free_irq(ips->irq, ips);
1621         pci_free_irq_vectors(dev);
1622         if (ips->adjust)
1623                 kthread_stop(ips->adjust);
1624         if (ips->monitor)
1625                 kthread_stop(ips->monitor);
1626         dev_dbg(&dev->dev, "IPS driver removed\n");
1627 }
1628 
1629 static struct pci_driver ips_pci_driver = {
1630         .name = "intel ips",
1631         .id_table = ips_id_table,
1632         .probe = ips_probe,
1633         .remove = ips_remove,
1634 };
1635 
1636 module_pci_driver(ips_pci_driver);
1637 
1638 MODULE_LICENSE("GPL v2");
1639 MODULE_AUTHOR("Jesse Barnes <jbarnes@virtuousgeek.org>");
1640 MODULE_DESCRIPTION("Intelligent Power Sharing Driver");

/* [<][>][^][v][top][bottom][index][help] */