1/* bbc_envctrl.c: UltraSPARC-III environment control driver.
2 *
3 * Copyright (C) 2001, 2008 David S. Miller (davem@davemloft.net)
4 */
5
6#include <linux/kthread.h>
7#include <linux/delay.h>
8#include <linux/kmod.h>
9#include <linux/reboot.h>
10#include <linux/of.h>
11#include <linux/slab.h>
12#include <linux/of_device.h>
13#include <asm/oplib.h>
14
15#include "bbc_i2c.h"
16#include "max1617.h"
17
18#undef ENVCTRL_TRACE
19
20/* WARNING: Making changes to this driver is very dangerous.
21 *          If you misprogram the sensor chips they can
22 *          cut the power on you instantly.
23 */
24
25/* Two temperature sensors exist in the SunBLADE-1000 enclosure.
26 * Both are implemented using max1617 i2c devices.  Each max1617
27 * monitors 2 temperatures, one for one of the cpu dies and the other
28 * for the ambient temperature.
29 *
30 * The max1617 is capable of being programmed with power-off
31 * temperature values, one low limit and one high limit.  These
32 * can be controlled independently for the cpu or ambient temperature.
33 * If a limit is violated, the power is simply shut off.  The frequency
34 * with which the max1617 does temperature sampling can be controlled
35 * as well.
36 *
37 * Three fans exist inside the machine, all three are controlled with
38 * an i2c digital to analog converter.  There is a fan directed at the
39 * two processor slots, another for the rest of the enclosure, and the
40 * third is for the power supply.  The first two fans may be speed
41 * controlled by changing the voltage fed to them.  The third fan may
42 * only be completely off or on.  The third fan is meant to only be
43 * disabled/enabled when entering/exiting the lowest power-saving
44 * mode of the machine.
45 *
46 * An environmental control kernel thread periodically monitors all
47 * temperature sensors.  Based upon the samples it will adjust the
48 * fan speeds to try and keep the system within a certain temperature
49 * range (the goal being to make the fans as quiet as possible without
50 * allowing the system to get too hot).
51 *
52 * If the temperature begins to rise/fall outside of the acceptable
53 * operating range, a periodic warning will be sent to the kernel log.
54 * The fans will be put on full blast to attempt to deal with this
55 * situation.  After exceeding the acceptable operating range by a
56 * certain threshold, the kernel thread will shut down the system.
57 * Here, the thread is attempting to shut the machine down cleanly
58 * before the hardware based power-off event is triggered.
59 */
60
61/* These settings are in Celsius.  We use these defaults only
62 * if we cannot interrogate the cpu-fru SEEPROM.
63 */
64struct temp_limits {
65	s8 high_pwroff, high_shutdown, high_warn;
66	s8 low_warn, low_shutdown, low_pwroff;
67};
68
69static struct temp_limits cpu_temp_limits[2] = {
70	{ 100, 85, 80, 5, -5, -10 },
71	{ 100, 85, 80, 5, -5, -10 },
72};
73
74static struct temp_limits amb_temp_limits[2] = {
75	{ 65, 55, 40, 5, -5, -10 },
76	{ 65, 55, 40, 5, -5, -10 },
77};
78
79static LIST_HEAD(all_temps);
80static LIST_HEAD(all_fans);
81
82#define CPU_FAN_REG	0xf0
83#define SYS_FAN_REG	0xf2
84#define PSUPPLY_FAN_REG	0xf4
85
86#define FAN_SPEED_MIN	0x0c
87#define FAN_SPEED_MAX	0x3f
88
89#define PSUPPLY_FAN_ON	0x1f
90#define PSUPPLY_FAN_OFF	0x00
91
92static void set_fan_speeds(struct bbc_fan_control *fp)
93{
94	/* Put temperatures into range so we don't mis-program
95	 * the hardware.
96	 */
97	if (fp->cpu_fan_speed < FAN_SPEED_MIN)
98		fp->cpu_fan_speed = FAN_SPEED_MIN;
99	if (fp->cpu_fan_speed > FAN_SPEED_MAX)
100		fp->cpu_fan_speed = FAN_SPEED_MAX;
101	if (fp->system_fan_speed < FAN_SPEED_MIN)
102		fp->system_fan_speed = FAN_SPEED_MIN;
103	if (fp->system_fan_speed > FAN_SPEED_MAX)
104		fp->system_fan_speed = FAN_SPEED_MAX;
105#ifdef ENVCTRL_TRACE
106	printk("fan%d: Changed fan speed to cpu(%02x) sys(%02x)\n",
107	       fp->index,
108	       fp->cpu_fan_speed, fp->system_fan_speed);
109#endif
110
111	bbc_i2c_writeb(fp->client, fp->cpu_fan_speed, CPU_FAN_REG);
112	bbc_i2c_writeb(fp->client, fp->system_fan_speed, SYS_FAN_REG);
113	bbc_i2c_writeb(fp->client,
114		       (fp->psupply_fan_on ?
115			PSUPPLY_FAN_ON : PSUPPLY_FAN_OFF),
116		       PSUPPLY_FAN_REG);
117}
118
119static void get_current_temps(struct bbc_cpu_temperature *tp)
120{
121	tp->prev_amb_temp = tp->curr_amb_temp;
122	bbc_i2c_readb(tp->client,
123		      (unsigned char *) &tp->curr_amb_temp,
124		      MAX1617_AMB_TEMP);
125	tp->prev_cpu_temp = tp->curr_cpu_temp;
126	bbc_i2c_readb(tp->client,
127		      (unsigned char *) &tp->curr_cpu_temp,
128		      MAX1617_CPU_TEMP);
129#ifdef ENVCTRL_TRACE
130	printk("temp%d: cpu(%d C) amb(%d C)\n",
131	       tp->index,
132	       (int) tp->curr_cpu_temp, (int) tp->curr_amb_temp);
133#endif
134}
135
136
137static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp)
138{
139	static int shutting_down = 0;
140	char *type = "???";
141	s8 val = -1;
142
143	if (shutting_down != 0)
144		return;
145
146	if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown ||
147	    tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) {
148		type = "ambient";
149		val = tp->curr_amb_temp;
150	} else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown ||
151		   tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) {
152		type = "CPU";
153		val = tp->curr_cpu_temp;
154	}
155
156	printk(KERN_CRIT "temp%d: Outside of safe %s "
157	       "operating temperature, %d C.\n",
158	       tp->index, type, val);
159
160	printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n");
161
162	shutting_down = 1;
163	orderly_poweroff(true);
164}
165
166#define WARN_INTERVAL	(30 * HZ)
167
168static void analyze_ambient_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick)
169{
170	int ret = 0;
171
172	if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) {
173		if (tp->curr_amb_temp >=
174		    amb_temp_limits[tp->index].high_warn) {
175			printk(KERN_WARNING "temp%d: "
176			       "Above safe ambient operating temperature, %d C.\n",
177			       tp->index, (int) tp->curr_amb_temp);
178			ret = 1;
179		} else if (tp->curr_amb_temp <
180			   amb_temp_limits[tp->index].low_warn) {
181			printk(KERN_WARNING "temp%d: "
182			       "Below safe ambient operating temperature, %d C.\n",
183			       tp->index, (int) tp->curr_amb_temp);
184			ret = 1;
185		}
186		if (ret)
187			*last_warn = jiffies;
188	} else if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_warn ||
189		   tp->curr_amb_temp < amb_temp_limits[tp->index].low_warn)
190		ret = 1;
191
192	/* Now check the shutdown limits. */
193	if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown ||
194	    tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) {
195		do_envctrl_shutdown(tp);
196		ret = 1;
197	}
198
199	if (ret) {
200		tp->fan_todo[FAN_AMBIENT] = FAN_FULLBLAST;
201	} else if ((tick & (8 - 1)) == 0) {
202		s8 amb_goal_hi = amb_temp_limits[tp->index].high_warn - 10;
203		s8 amb_goal_lo;
204
205		amb_goal_lo = amb_goal_hi - 3;
206
207		/* We do not try to avoid 'too cold' events.  Basically we
208		 * only try to deal with over-heating and fan noise reduction.
209		 */
210		if (tp->avg_amb_temp < amb_goal_hi) {
211			if (tp->avg_amb_temp >= amb_goal_lo)
212				tp->fan_todo[FAN_AMBIENT] = FAN_SAME;
213			else
214				tp->fan_todo[FAN_AMBIENT] = FAN_SLOWER;
215		} else {
216			tp->fan_todo[FAN_AMBIENT] = FAN_FASTER;
217		}
218	} else {
219		tp->fan_todo[FAN_AMBIENT] = FAN_SAME;
220	}
221}
222
223static void analyze_cpu_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick)
224{
225	int ret = 0;
226
227	if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) {
228		if (tp->curr_cpu_temp >=
229		    cpu_temp_limits[tp->index].high_warn) {
230			printk(KERN_WARNING "temp%d: "
231			       "Above safe CPU operating temperature, %d C.\n",
232			       tp->index, (int) tp->curr_cpu_temp);
233			ret = 1;
234		} else if (tp->curr_cpu_temp <
235			   cpu_temp_limits[tp->index].low_warn) {
236			printk(KERN_WARNING "temp%d: "
237			       "Below safe CPU operating temperature, %d C.\n",
238			       tp->index, (int) tp->curr_cpu_temp);
239			ret = 1;
240		}
241		if (ret)
242			*last_warn = jiffies;
243	} else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_warn ||
244		   tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_warn)
245		ret = 1;
246
247	/* Now check the shutdown limits. */
248	if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown ||
249	    tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) {
250		do_envctrl_shutdown(tp);
251		ret = 1;
252	}
253
254	if (ret) {
255		tp->fan_todo[FAN_CPU] = FAN_FULLBLAST;
256	} else if ((tick & (8 - 1)) == 0) {
257		s8 cpu_goal_hi = cpu_temp_limits[tp->index].high_warn - 10;
258		s8 cpu_goal_lo;
259
260		cpu_goal_lo = cpu_goal_hi - 3;
261
262		/* We do not try to avoid 'too cold' events.  Basically we
263		 * only try to deal with over-heating and fan noise reduction.
264		 */
265		if (tp->avg_cpu_temp < cpu_goal_hi) {
266			if (tp->avg_cpu_temp >= cpu_goal_lo)
267				tp->fan_todo[FAN_CPU] = FAN_SAME;
268			else
269				tp->fan_todo[FAN_CPU] = FAN_SLOWER;
270		} else {
271			tp->fan_todo[FAN_CPU] = FAN_FASTER;
272		}
273	} else {
274		tp->fan_todo[FAN_CPU] = FAN_SAME;
275	}
276}
277
278static void analyze_temps(struct bbc_cpu_temperature *tp, unsigned long *last_warn)
279{
280	tp->avg_amb_temp = (s8)((int)((int)tp->avg_amb_temp + (int)tp->curr_amb_temp) / 2);
281	tp->avg_cpu_temp = (s8)((int)((int)tp->avg_cpu_temp + (int)tp->curr_cpu_temp) / 2);
282
283	analyze_ambient_temp(tp, last_warn, tp->sample_tick);
284	analyze_cpu_temp(tp, last_warn, tp->sample_tick);
285
286	tp->sample_tick++;
287}
288
289static enum fan_action prioritize_fan_action(int which_fan)
290{
291	struct bbc_cpu_temperature *tp;
292	enum fan_action decision = FAN_STATE_MAX;
293
294	/* Basically, prioritize what the temperature sensors
295	 * recommend we do, and perform that action on all the
296	 * fans.
297	 */
298	list_for_each_entry(tp, &all_temps, glob_list) {
299		if (tp->fan_todo[which_fan] == FAN_FULLBLAST) {
300			decision = FAN_FULLBLAST;
301			break;
302		}
303		if (tp->fan_todo[which_fan] == FAN_SAME &&
304		    decision != FAN_FASTER)
305			decision = FAN_SAME;
306		else if (tp->fan_todo[which_fan] == FAN_FASTER)
307			decision = FAN_FASTER;
308		else if (decision != FAN_FASTER &&
309			 decision != FAN_SAME &&
310			 tp->fan_todo[which_fan] == FAN_SLOWER)
311			decision = FAN_SLOWER;
312	}
313	if (decision == FAN_STATE_MAX)
314		decision = FAN_SAME;
315
316	return decision;
317}
318
319static int maybe_new_ambient_fan_speed(struct bbc_fan_control *fp)
320{
321	enum fan_action decision = prioritize_fan_action(FAN_AMBIENT);
322	int ret;
323
324	if (decision == FAN_SAME)
325		return 0;
326
327	ret = 1;
328	if (decision == FAN_FULLBLAST) {
329		if (fp->system_fan_speed >= FAN_SPEED_MAX)
330			ret = 0;
331		else
332			fp->system_fan_speed = FAN_SPEED_MAX;
333	} else {
334		if (decision == FAN_FASTER) {
335			if (fp->system_fan_speed >= FAN_SPEED_MAX)
336				ret = 0;
337			else
338				fp->system_fan_speed += 2;
339		} else {
340			int orig_speed = fp->system_fan_speed;
341
342			if (orig_speed <= FAN_SPEED_MIN ||
343			    orig_speed <= (fp->cpu_fan_speed - 3))
344				ret = 0;
345			else
346				fp->system_fan_speed -= 1;
347		}
348	}
349
350	return ret;
351}
352
353static int maybe_new_cpu_fan_speed(struct bbc_fan_control *fp)
354{
355	enum fan_action decision = prioritize_fan_action(FAN_CPU);
356	int ret;
357
358	if (decision == FAN_SAME)
359		return 0;
360
361	ret = 1;
362	if (decision == FAN_FULLBLAST) {
363		if (fp->cpu_fan_speed >= FAN_SPEED_MAX)
364			ret = 0;
365		else
366			fp->cpu_fan_speed = FAN_SPEED_MAX;
367	} else {
368		if (decision == FAN_FASTER) {
369			if (fp->cpu_fan_speed >= FAN_SPEED_MAX)
370				ret = 0;
371			else {
372				fp->cpu_fan_speed += 2;
373				if (fp->system_fan_speed <
374				    (fp->cpu_fan_speed - 3))
375					fp->system_fan_speed =
376						fp->cpu_fan_speed - 3;
377			}
378		} else {
379			if (fp->cpu_fan_speed <= FAN_SPEED_MIN)
380				ret = 0;
381			else
382				fp->cpu_fan_speed -= 1;
383		}
384	}
385
386	return ret;
387}
388
389static void maybe_new_fan_speeds(struct bbc_fan_control *fp)
390{
391	int new;
392
393	new  = maybe_new_ambient_fan_speed(fp);
394	new |= maybe_new_cpu_fan_speed(fp);
395
396	if (new)
397		set_fan_speeds(fp);
398}
399
400static void fans_full_blast(void)
401{
402	struct bbc_fan_control *fp;
403
404	/* Since we will not be monitoring things anymore, put
405	 * the fans on full blast.
406	 */
407	list_for_each_entry(fp, &all_fans, glob_list) {
408		fp->cpu_fan_speed = FAN_SPEED_MAX;
409		fp->system_fan_speed = FAN_SPEED_MAX;
410		fp->psupply_fan_on = 1;
411		set_fan_speeds(fp);
412	}
413}
414
415#define POLL_INTERVAL	(5 * 1000)
416static unsigned long last_warning_jiffies;
417static struct task_struct *kenvctrld_task;
418
419static int kenvctrld(void *__unused)
420{
421	printk(KERN_INFO "bbc_envctrl: kenvctrld starting...\n");
422	last_warning_jiffies = jiffies - WARN_INTERVAL;
423	for (;;) {
424		struct bbc_cpu_temperature *tp;
425		struct bbc_fan_control *fp;
426
427		msleep_interruptible(POLL_INTERVAL);
428		if (kthread_should_stop())
429			break;
430
431		list_for_each_entry(tp, &all_temps, glob_list) {
432			get_current_temps(tp);
433			analyze_temps(tp, &last_warning_jiffies);
434		}
435		list_for_each_entry(fp, &all_fans, glob_list)
436			maybe_new_fan_speeds(fp);
437	}
438	printk(KERN_INFO "bbc_envctrl: kenvctrld exiting...\n");
439
440	fans_full_blast();
441
442	return 0;
443}
444
445static void attach_one_temp(struct bbc_i2c_bus *bp, struct platform_device *op,
446			    int temp_idx)
447{
448	struct bbc_cpu_temperature *tp;
449
450	tp = kzalloc(sizeof(*tp), GFP_KERNEL);
451	if (!tp)
452		return;
453
454	INIT_LIST_HEAD(&tp->bp_list);
455	INIT_LIST_HEAD(&tp->glob_list);
456
457	tp->client = bbc_i2c_attach(bp, op);
458	if (!tp->client) {
459		kfree(tp);
460		return;
461	}
462
463
464	tp->index = temp_idx;
465
466	list_add(&tp->glob_list, &all_temps);
467	list_add(&tp->bp_list, &bp->temps);
468
469	/* Tell it to convert once every 5 seconds, clear all cfg
470	 * bits.
471	 */
472	bbc_i2c_writeb(tp->client, 0x00, MAX1617_WR_CFG_BYTE);
473	bbc_i2c_writeb(tp->client, 0x02, MAX1617_WR_CVRATE_BYTE);
474
475	/* Program the hard temperature limits into the chip. */
476	bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].high_pwroff,
477		       MAX1617_WR_AMB_HIGHLIM);
478	bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].low_pwroff,
479		       MAX1617_WR_AMB_LOWLIM);
480	bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].high_pwroff,
481		       MAX1617_WR_CPU_HIGHLIM);
482	bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].low_pwroff,
483		       MAX1617_WR_CPU_LOWLIM);
484
485	get_current_temps(tp);
486	tp->prev_cpu_temp = tp->avg_cpu_temp = tp->curr_cpu_temp;
487	tp->prev_amb_temp = tp->avg_amb_temp = tp->curr_amb_temp;
488
489	tp->fan_todo[FAN_AMBIENT] = FAN_SAME;
490	tp->fan_todo[FAN_CPU] = FAN_SAME;
491}
492
493static void attach_one_fan(struct bbc_i2c_bus *bp, struct platform_device *op,
494			   int fan_idx)
495{
496	struct bbc_fan_control *fp;
497
498	fp = kzalloc(sizeof(*fp), GFP_KERNEL);
499	if (!fp)
500		return;
501
502	INIT_LIST_HEAD(&fp->bp_list);
503	INIT_LIST_HEAD(&fp->glob_list);
504
505	fp->client = bbc_i2c_attach(bp, op);
506	if (!fp->client) {
507		kfree(fp);
508		return;
509	}
510
511	fp->index = fan_idx;
512
513	list_add(&fp->glob_list, &all_fans);
514	list_add(&fp->bp_list, &bp->fans);
515
516	/* The i2c device controlling the fans is write-only.
517	 * So the only way to keep track of the current power
518	 * level fed to the fans is via software.  Choose half
519	 * power for cpu/system and 'on' fo the powersupply fan
520	 * and set it now.
521	 */
522	fp->psupply_fan_on = 1;
523	fp->cpu_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2;
524	fp->cpu_fan_speed += FAN_SPEED_MIN;
525	fp->system_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2;
526	fp->system_fan_speed += FAN_SPEED_MIN;
527
528	set_fan_speeds(fp);
529}
530
531static void destroy_one_temp(struct bbc_cpu_temperature *tp)
532{
533	bbc_i2c_detach(tp->client);
534	kfree(tp);
535}
536
537static void destroy_all_temps(struct bbc_i2c_bus *bp)
538{
539	struct bbc_cpu_temperature *tp, *tpos;
540
541	list_for_each_entry_safe(tp, tpos, &bp->temps, bp_list) {
542		list_del(&tp->bp_list);
543		list_del(&tp->glob_list);
544		destroy_one_temp(tp);
545	}
546}
547
548static void destroy_one_fan(struct bbc_fan_control *fp)
549{
550	bbc_i2c_detach(fp->client);
551	kfree(fp);
552}
553
554static void destroy_all_fans(struct bbc_i2c_bus *bp)
555{
556	struct bbc_fan_control *fp, *fpos;
557
558	list_for_each_entry_safe(fp, fpos, &bp->fans, bp_list) {
559		list_del(&fp->bp_list);
560		list_del(&fp->glob_list);
561		destroy_one_fan(fp);
562	}
563}
564
565int bbc_envctrl_init(struct bbc_i2c_bus *bp)
566{
567	struct platform_device *op;
568	int temp_index = 0;
569	int fan_index = 0;
570	int devidx = 0;
571
572	while ((op = bbc_i2c_getdev(bp, devidx++)) != NULL) {
573		if (!strcmp(op->dev.of_node->name, "temperature"))
574			attach_one_temp(bp, op, temp_index++);
575		if (!strcmp(op->dev.of_node->name, "fan-control"))
576			attach_one_fan(bp, op, fan_index++);
577	}
578	if (temp_index != 0 && fan_index != 0) {
579		kenvctrld_task = kthread_run(kenvctrld, NULL, "kenvctrld");
580		if (IS_ERR(kenvctrld_task)) {
581			int err = PTR_ERR(kenvctrld_task);
582
583			kenvctrld_task = NULL;
584			destroy_all_temps(bp);
585			destroy_all_fans(bp);
586			return err;
587		}
588	}
589
590	return 0;
591}
592
593void bbc_envctrl_cleanup(struct bbc_i2c_bus *bp)
594{
595	if (kenvctrld_task)
596		kthread_stop(kenvctrld_task);
597
598	destroy_all_temps(bp);
599	destroy_all_fans(bp);
600}
601