1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses.  You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 *     Redistribution and use in source and binary forms, with or
13 *     without modification, are permitted provided that the following
14 *     conditions are met:
15 *
16 *      - Redistributions of source code must retain the above
17 *        copyright notice, this list of conditions and the following
18 *        disclaimer.
19 *
20 *      - Redistributions in binary form must reproduce the above
21 *        copyright notice, this list of conditions and the following
22 *        disclaimer in the documentation and/or other materials
23 *        provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35/*
36 * This file contains support for diagnostic functions.  It is accessed by
37 * opening the qib_diag device, normally minor number 129.  Diagnostic use
38 * of the QLogic_IB chip may render the chip or board unusable until the
39 * driver is unloaded, or in some cases, until the system is rebooted.
40 *
41 * Accesses to the chip through this interface are not similar to going
42 * through the /sys/bus/pci resource mmap interface.
43 */
44
45#include <linux/io.h>
46#include <linux/pci.h>
47#include <linux/poll.h>
48#include <linux/vmalloc.h>
49#include <linux/export.h>
50#include <linux/fs.h>
51#include <linux/uaccess.h>
52
53#include "qib.h"
54#include "qib_common.h"
55
56#undef pr_fmt
57#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
58
59/*
60 * Each client that opens the diag device must read then write
61 * offset 0, to prevent lossage from random cat or od. diag_state
62 * sequences this "handshake".
63 */
64enum diag_state { UNUSED = 0, OPENED, INIT, READY };
65
66/* State for an individual client. PID so children cannot abuse handshake */
67static struct qib_diag_client {
68	struct qib_diag_client *next;
69	struct qib_devdata *dd;
70	pid_t pid;
71	enum diag_state state;
72} *client_pool;
73
74/*
75 * Get a client struct. Recycled if possible, else kmalloc.
76 * Must be called with qib_mutex held
77 */
78static struct qib_diag_client *get_client(struct qib_devdata *dd)
79{
80	struct qib_diag_client *dc;
81
82	dc = client_pool;
83	if (dc)
84		/* got from pool remove it and use */
85		client_pool = dc->next;
86	else
87		/* None in pool, alloc and init */
88		dc = kmalloc(sizeof(*dc), GFP_KERNEL);
89
90	if (dc) {
91		dc->next = NULL;
92		dc->dd = dd;
93		dc->pid = current->pid;
94		dc->state = OPENED;
95	}
96	return dc;
97}
98
99/*
100 * Return to pool. Must be called with qib_mutex held
101 */
102static void return_client(struct qib_diag_client *dc)
103{
104	struct qib_devdata *dd = dc->dd;
105	struct qib_diag_client *tdc, *rdc;
106
107	rdc = NULL;
108	if (dc == dd->diag_client) {
109		dd->diag_client = dc->next;
110		rdc = dc;
111	} else {
112		tdc = dc->dd->diag_client;
113		while (tdc) {
114			if (dc == tdc->next) {
115				tdc->next = dc->next;
116				rdc = dc;
117				break;
118			}
119			tdc = tdc->next;
120		}
121	}
122	if (rdc) {
123		rdc->state = UNUSED;
124		rdc->dd = NULL;
125		rdc->pid = 0;
126		rdc->next = client_pool;
127		client_pool = rdc;
128	}
129}
130
131static int qib_diag_open(struct inode *in, struct file *fp);
132static int qib_diag_release(struct inode *in, struct file *fp);
133static ssize_t qib_diag_read(struct file *fp, char __user *data,
134			     size_t count, loff_t *off);
135static ssize_t qib_diag_write(struct file *fp, const char __user *data,
136			      size_t count, loff_t *off);
137
138static const struct file_operations diag_file_ops = {
139	.owner = THIS_MODULE,
140	.write = qib_diag_write,
141	.read = qib_diag_read,
142	.open = qib_diag_open,
143	.release = qib_diag_release,
144	.llseek = default_llseek,
145};
146
147static atomic_t diagpkt_count = ATOMIC_INIT(0);
148static struct cdev *diagpkt_cdev;
149static struct device *diagpkt_device;
150
151static ssize_t qib_diagpkt_write(struct file *fp, const char __user *data,
152				 size_t count, loff_t *off);
153
154static const struct file_operations diagpkt_file_ops = {
155	.owner = THIS_MODULE,
156	.write = qib_diagpkt_write,
157	.llseek = noop_llseek,
158};
159
160int qib_diag_add(struct qib_devdata *dd)
161{
162	char name[16];
163	int ret = 0;
164
165	if (atomic_inc_return(&diagpkt_count) == 1) {
166		ret = qib_cdev_init(QIB_DIAGPKT_MINOR, "ipath_diagpkt",
167				    &diagpkt_file_ops, &diagpkt_cdev,
168				    &diagpkt_device);
169		if (ret)
170			goto done;
171	}
172
173	snprintf(name, sizeof(name), "ipath_diag%d", dd->unit);
174	ret = qib_cdev_init(QIB_DIAG_MINOR_BASE + dd->unit, name,
175			    &diag_file_ops, &dd->diag_cdev,
176			    &dd->diag_device);
177done:
178	return ret;
179}
180
181static void qib_unregister_observers(struct qib_devdata *dd);
182
183void qib_diag_remove(struct qib_devdata *dd)
184{
185	struct qib_diag_client *dc;
186
187	if (atomic_dec_and_test(&diagpkt_count))
188		qib_cdev_cleanup(&diagpkt_cdev, &diagpkt_device);
189
190	qib_cdev_cleanup(&dd->diag_cdev, &dd->diag_device);
191
192	/*
193	 * Return all diag_clients of this device. There should be none,
194	 * as we are "guaranteed" that no clients are still open
195	 */
196	while (dd->diag_client)
197		return_client(dd->diag_client);
198
199	/* Now clean up all unused client structs */
200	while (client_pool) {
201		dc = client_pool;
202		client_pool = dc->next;
203		kfree(dc);
204	}
205	/* Clean up observer list */
206	qib_unregister_observers(dd);
207}
208
209/* qib_remap_ioaddr32 - remap an offset into chip address space to __iomem *
210 *
211 * @dd: the qlogic_ib device
212 * @offs: the offset in chip-space
213 * @cntp: Pointer to max (byte) count for transfer starting at offset
214 * This returns a u32 __iomem * so it can be used for both 64 and 32-bit
215 * mapping. It is needed because with the use of PAT for control of
216 * write-combining, the logically contiguous address-space of the chip
217 * may be split into virtually non-contiguous spaces, with different
218 * attributes, which are them mapped to contiguous physical space
219 * based from the first BAR.
220 *
221 * The code below makes the same assumptions as were made in
222 * init_chip_wc_pat() (qib_init.c), copied here:
223 * Assumes chip address space looks like:
224 *		- kregs + sregs + cregs + uregs (in any order)
225 *		- piobufs (2K and 4K bufs in either order)
226 *	or:
227 *		- kregs + sregs + cregs (in any order)
228 *		- piobufs (2K and 4K bufs in either order)
229 *		- uregs
230 *
231 * If cntp is non-NULL, returns how many bytes from offset can be accessed
232 * Returns 0 if the offset is not mapped.
233 */
234static u32 __iomem *qib_remap_ioaddr32(struct qib_devdata *dd, u32 offset,
235				       u32 *cntp)
236{
237	u32 kreglen;
238	u32 snd_bottom, snd_lim = 0;
239	u32 __iomem *krb32 = (u32 __iomem *)dd->kregbase;
240	u32 __iomem *map = NULL;
241	u32 cnt = 0;
242	u32 tot4k, offs4k;
243
244	/* First, simplest case, offset is within the first map. */
245	kreglen = (dd->kregend - dd->kregbase) * sizeof(u64);
246	if (offset < kreglen) {
247		map = krb32 + (offset / sizeof(u32));
248		cnt = kreglen - offset;
249		goto mapped;
250	}
251
252	/*
253	 * Next check for user regs, the next most common case,
254	 * and a cheap check because if they are not in the first map
255	 * they are last in chip.
256	 */
257	if (dd->userbase) {
258		/* If user regs mapped, they are after send, so set limit. */
259		u32 ulim = (dd->cfgctxts * dd->ureg_align) + dd->uregbase;
260
261		if (!dd->piovl15base)
262			snd_lim = dd->uregbase;
263		krb32 = (u32 __iomem *)dd->userbase;
264		if (offset >= dd->uregbase && offset < ulim) {
265			map = krb32 + (offset - dd->uregbase) / sizeof(u32);
266			cnt = ulim - offset;
267			goto mapped;
268		}
269	}
270
271	/*
272	 * Lastly, check for offset within Send Buffers.
273	 * This is gnarly because struct devdata is deliberately vague
274	 * about things like 7322 VL15 buffers, and we are not in
275	 * chip-specific code here, so should not make many assumptions.
276	 * The one we _do_ make is that the only chip that has more sndbufs
277	 * than we admit is the 7322, and it has userregs above that, so
278	 * we know the snd_lim.
279	 */
280	/* Assume 2K buffers are first. */
281	snd_bottom = dd->pio2k_bufbase;
282	if (snd_lim == 0) {
283		u32 tot2k = dd->piobcnt2k * ALIGN(dd->piosize2k, dd->palign);
284
285		snd_lim = snd_bottom + tot2k;
286	}
287	/* If 4k buffers exist, account for them by bumping
288	 * appropriate limit.
289	 */
290	tot4k = dd->piobcnt4k * dd->align4k;
291	offs4k = dd->piobufbase >> 32;
292	if (dd->piobcnt4k) {
293		if (snd_bottom > offs4k)
294			snd_bottom = offs4k;
295		else {
296			/* 4k above 2k. Bump snd_lim, if needed*/
297			if (!dd->userbase || dd->piovl15base)
298				snd_lim = offs4k + tot4k;
299		}
300	}
301	/*
302	 * Judgement call: can we ignore the space between SendBuffs and
303	 * UserRegs, where we would like to see vl15 buffs, but not more?
304	 */
305	if (offset >= snd_bottom && offset < snd_lim) {
306		offset -= snd_bottom;
307		map = (u32 __iomem *)dd->piobase + (offset / sizeof(u32));
308		cnt = snd_lim - offset;
309	}
310
311	if (!map && offs4k && dd->piovl15base) {
312		snd_lim = offs4k + tot4k + 2 * dd->align4k;
313		if (offset >= (offs4k + tot4k) && offset < snd_lim) {
314			map = (u32 __iomem *)dd->piovl15base +
315				((offset - (offs4k + tot4k)) / sizeof(u32));
316			cnt = snd_lim - offset;
317		}
318	}
319
320mapped:
321	if (cntp)
322		*cntp = cnt;
323	return map;
324}
325
326/*
327 * qib_read_umem64 - read a 64-bit quantity from the chip into user space
328 * @dd: the qlogic_ib device
329 * @uaddr: the location to store the data in user memory
330 * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore)
331 * @count: number of bytes to copy (multiple of 32 bits)
332 *
333 * This function also localizes all chip memory accesses.
334 * The copy should be written such that we read full cacheline packets
335 * from the chip.  This is usually used for a single qword
336 *
337 * NOTE:  This assumes the chip address is 64-bit aligned.
338 */
339static int qib_read_umem64(struct qib_devdata *dd, void __user *uaddr,
340			   u32 regoffs, size_t count)
341{
342	const u64 __iomem *reg_addr;
343	const u64 __iomem *reg_end;
344	u32 limit;
345	int ret;
346
347	reg_addr = (const u64 __iomem *)qib_remap_ioaddr32(dd, regoffs, &limit);
348	if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) {
349		ret = -EINVAL;
350		goto bail;
351	}
352	if (count >= limit)
353		count = limit;
354	reg_end = reg_addr + (count / sizeof(u64));
355
356	/* not very efficient, but it works for now */
357	while (reg_addr < reg_end) {
358		u64 data = readq(reg_addr);
359
360		if (copy_to_user(uaddr, &data, sizeof(u64))) {
361			ret = -EFAULT;
362			goto bail;
363		}
364		reg_addr++;
365		uaddr += sizeof(u64);
366	}
367	ret = 0;
368bail:
369	return ret;
370}
371
372/*
373 * qib_write_umem64 - write a 64-bit quantity to the chip from user space
374 * @dd: the qlogic_ib device
375 * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore)
376 * @uaddr: the source of the data in user memory
377 * @count: the number of bytes to copy (multiple of 32 bits)
378 *
379 * This is usually used for a single qword
380 * NOTE:  This assumes the chip address is 64-bit aligned.
381 */
382
383static int qib_write_umem64(struct qib_devdata *dd, u32 regoffs,
384			    const void __user *uaddr, size_t count)
385{
386	u64 __iomem *reg_addr;
387	const u64 __iomem *reg_end;
388	u32 limit;
389	int ret;
390
391	reg_addr = (u64 __iomem *)qib_remap_ioaddr32(dd, regoffs, &limit);
392	if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) {
393		ret = -EINVAL;
394		goto bail;
395	}
396	if (count >= limit)
397		count = limit;
398	reg_end = reg_addr + (count / sizeof(u64));
399
400	/* not very efficient, but it works for now */
401	while (reg_addr < reg_end) {
402		u64 data;
403
404		if (copy_from_user(&data, uaddr, sizeof(data))) {
405			ret = -EFAULT;
406			goto bail;
407		}
408		writeq(data, reg_addr);
409
410		reg_addr++;
411		uaddr += sizeof(u64);
412	}
413	ret = 0;
414bail:
415	return ret;
416}
417
418/*
419 * qib_read_umem32 - read a 32-bit quantity from the chip into user space
420 * @dd: the qlogic_ib device
421 * @uaddr: the location to store the data in user memory
422 * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore)
423 * @count: number of bytes to copy
424 *
425 * read 32 bit values, not 64 bit; for memories that only
426 * support 32 bit reads; usually a single dword.
427 */
428static int qib_read_umem32(struct qib_devdata *dd, void __user *uaddr,
429			   u32 regoffs, size_t count)
430{
431	const u32 __iomem *reg_addr;
432	const u32 __iomem *reg_end;
433	u32 limit;
434	int ret;
435
436	reg_addr = qib_remap_ioaddr32(dd, regoffs, &limit);
437	if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) {
438		ret = -EINVAL;
439		goto bail;
440	}
441	if (count >= limit)
442		count = limit;
443	reg_end = reg_addr + (count / sizeof(u32));
444
445	/* not very efficient, but it works for now */
446	while (reg_addr < reg_end) {
447		u32 data = readl(reg_addr);
448
449		if (copy_to_user(uaddr, &data, sizeof(data))) {
450			ret = -EFAULT;
451			goto bail;
452		}
453
454		reg_addr++;
455		uaddr += sizeof(u32);
456
457	}
458	ret = 0;
459bail:
460	return ret;
461}
462
463/*
464 * qib_write_umem32 - write a 32-bit quantity to the chip from user space
465 * @dd: the qlogic_ib device
466 * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore)
467 * @uaddr: the source of the data in user memory
468 * @count: number of bytes to copy
469 *
470 * write 32 bit values, not 64 bit; for memories that only
471 * support 32 bit write; usually a single dword.
472 */
473
474static int qib_write_umem32(struct qib_devdata *dd, u32 regoffs,
475			    const void __user *uaddr, size_t count)
476{
477	u32 __iomem *reg_addr;
478	const u32 __iomem *reg_end;
479	u32 limit;
480	int ret;
481
482	reg_addr = qib_remap_ioaddr32(dd, regoffs, &limit);
483	if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) {
484		ret = -EINVAL;
485		goto bail;
486	}
487	if (count >= limit)
488		count = limit;
489	reg_end = reg_addr + (count / sizeof(u32));
490
491	while (reg_addr < reg_end) {
492		u32 data;
493
494		if (copy_from_user(&data, uaddr, sizeof(data))) {
495			ret = -EFAULT;
496			goto bail;
497		}
498		writel(data, reg_addr);
499
500		reg_addr++;
501		uaddr += sizeof(u32);
502	}
503	ret = 0;
504bail:
505	return ret;
506}
507
508static int qib_diag_open(struct inode *in, struct file *fp)
509{
510	int unit = iminor(in) - QIB_DIAG_MINOR_BASE;
511	struct qib_devdata *dd;
512	struct qib_diag_client *dc;
513	int ret;
514
515	mutex_lock(&qib_mutex);
516
517	dd = qib_lookup(unit);
518
519	if (dd == NULL || !(dd->flags & QIB_PRESENT) ||
520	    !dd->kregbase) {
521		ret = -ENODEV;
522		goto bail;
523	}
524
525	dc = get_client(dd);
526	if (!dc) {
527		ret = -ENOMEM;
528		goto bail;
529	}
530	dc->next = dd->diag_client;
531	dd->diag_client = dc;
532	fp->private_data = dc;
533	ret = 0;
534bail:
535	mutex_unlock(&qib_mutex);
536
537	return ret;
538}
539
540/**
541 * qib_diagpkt_write - write an IB packet
542 * @fp: the diag data device file pointer
543 * @data: qib_diag_pkt structure saying where to get the packet
544 * @count: size of data to write
545 * @off: unused by this code
546 */
547static ssize_t qib_diagpkt_write(struct file *fp,
548				 const char __user *data,
549				 size_t count, loff_t *off)
550{
551	u32 __iomem *piobuf;
552	u32 plen, pbufn, maxlen_reserve;
553	struct qib_diag_xpkt dp;
554	u32 *tmpbuf = NULL;
555	struct qib_devdata *dd;
556	struct qib_pportdata *ppd;
557	ssize_t ret = 0;
558
559	if (count != sizeof(dp)) {
560		ret = -EINVAL;
561		goto bail;
562	}
563	if (copy_from_user(&dp, data, sizeof(dp))) {
564		ret = -EFAULT;
565		goto bail;
566	}
567
568	dd = qib_lookup(dp.unit);
569	if (!dd || !(dd->flags & QIB_PRESENT) || !dd->kregbase) {
570		ret = -ENODEV;
571		goto bail;
572	}
573	if (!(dd->flags & QIB_INITTED)) {
574		/* no hardware, freeze, etc. */
575		ret = -ENODEV;
576		goto bail;
577	}
578
579	if (dp.version != _DIAG_XPKT_VERS) {
580		qib_dev_err(dd, "Invalid version %u for diagpkt_write\n",
581			    dp.version);
582		ret = -EINVAL;
583		goto bail;
584	}
585	/* send count must be an exact number of dwords */
586	if (dp.len & 3) {
587		ret = -EINVAL;
588		goto bail;
589	}
590	if (!dp.port || dp.port > dd->num_pports) {
591		ret = -EINVAL;
592		goto bail;
593	}
594	ppd = &dd->pport[dp.port - 1];
595
596	/*
597	 * need total length before first word written, plus 2 Dwords. One Dword
598	 * is for padding so we get the full user data when not aligned on
599	 * a word boundary. The other Dword is to make sure we have room for the
600	 * ICRC which gets tacked on later.
601	 */
602	maxlen_reserve = 2 * sizeof(u32);
603	if (dp.len > ppd->ibmaxlen - maxlen_reserve) {
604		ret = -EINVAL;
605		goto bail;
606	}
607
608	plen = sizeof(u32) + dp.len;
609
610	tmpbuf = vmalloc(plen);
611	if (!tmpbuf) {
612		qib_devinfo(dd->pcidev,
613			"Unable to allocate tmp buffer, failing\n");
614		ret = -ENOMEM;
615		goto bail;
616	}
617
618	if (copy_from_user(tmpbuf,
619			   (const void __user *) (unsigned long) dp.data,
620			   dp.len)) {
621		ret = -EFAULT;
622		goto bail;
623	}
624
625	plen >>= 2;             /* in dwords */
626
627	if (dp.pbc_wd == 0)
628		dp.pbc_wd = plen;
629
630	piobuf = dd->f_getsendbuf(ppd, dp.pbc_wd, &pbufn);
631	if (!piobuf) {
632		ret = -EBUSY;
633		goto bail;
634	}
635	/* disarm it just to be extra sure */
636	dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(pbufn));
637
638	/* disable header check on pbufn for this packet */
639	dd->f_txchk_change(dd, pbufn, 1, TXCHK_CHG_TYPE_DIS1, NULL);
640
641	writeq(dp.pbc_wd, piobuf);
642	/*
643	 * Copy all but the trigger word, then flush, so it's written
644	 * to chip before trigger word, then write trigger word, then
645	 * flush again, so packet is sent.
646	 */
647	if (dd->flags & QIB_PIO_FLUSH_WC) {
648		qib_flush_wc();
649		qib_pio_copy(piobuf + 2, tmpbuf, plen - 1);
650		qib_flush_wc();
651		__raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
652	} else
653		qib_pio_copy(piobuf + 2, tmpbuf, plen);
654
655	if (dd->flags & QIB_USE_SPCL_TRIG) {
656		u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
657
658		qib_flush_wc();
659		__raw_writel(0xaebecede, piobuf + spcl_off);
660	}
661
662	/*
663	 * Ensure buffer is written to the chip, then re-enable
664	 * header checks (if supported by chip).  The txchk
665	 * code will ensure seen by chip before returning.
666	 */
667	qib_flush_wc();
668	qib_sendbuf_done(dd, pbufn);
669	dd->f_txchk_change(dd, pbufn, 1, TXCHK_CHG_TYPE_ENAB1, NULL);
670
671	ret = sizeof(dp);
672
673bail:
674	vfree(tmpbuf);
675	return ret;
676}
677
678static int qib_diag_release(struct inode *in, struct file *fp)
679{
680	mutex_lock(&qib_mutex);
681	return_client(fp->private_data);
682	fp->private_data = NULL;
683	mutex_unlock(&qib_mutex);
684	return 0;
685}
686
687/*
688 * Chip-specific code calls to register its interest in
689 * a specific range.
690 */
691struct diag_observer_list_elt {
692	struct diag_observer_list_elt *next;
693	const struct diag_observer *op;
694};
695
696int qib_register_observer(struct qib_devdata *dd,
697			  const struct diag_observer *op)
698{
699	struct diag_observer_list_elt *olp;
700	unsigned long flags;
701
702	if (!dd || !op)
703		return -EINVAL;
704	olp = vmalloc(sizeof(*olp));
705	if (!olp) {
706		pr_err("vmalloc for observer failed\n");
707		return -ENOMEM;
708	}
709
710	spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
711	olp->op = op;
712	olp->next = dd->diag_observer_list;
713	dd->diag_observer_list = olp;
714	spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
715
716	return 0;
717}
718
719/* Remove all registered observers when device is closed */
720static void qib_unregister_observers(struct qib_devdata *dd)
721{
722	struct diag_observer_list_elt *olp;
723	unsigned long flags;
724
725	spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
726	olp = dd->diag_observer_list;
727	while (olp) {
728		/* Pop one observer, let go of lock */
729		dd->diag_observer_list = olp->next;
730		spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
731		vfree(olp);
732		/* try again. */
733		spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
734		olp = dd->diag_observer_list;
735	}
736	spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
737}
738
739/*
740 * Find the observer, if any, for the specified address. Initial implementation
741 * is simple stack of observers. This must be called with diag transaction
742 * lock held.
743 */
744static const struct diag_observer *diag_get_observer(struct qib_devdata *dd,
745						     u32 addr)
746{
747	struct diag_observer_list_elt *olp;
748	const struct diag_observer *op = NULL;
749
750	olp = dd->diag_observer_list;
751	while (olp) {
752		op = olp->op;
753		if (addr >= op->bottom && addr <= op->top)
754			break;
755		olp = olp->next;
756	}
757	if (!olp)
758		op = NULL;
759
760	return op;
761}
762
763static ssize_t qib_diag_read(struct file *fp, char __user *data,
764			     size_t count, loff_t *off)
765{
766	struct qib_diag_client *dc = fp->private_data;
767	struct qib_devdata *dd = dc->dd;
768	void __iomem *kreg_base;
769	ssize_t ret;
770
771	if (dc->pid != current->pid) {
772		ret = -EPERM;
773		goto bail;
774	}
775
776	kreg_base = dd->kregbase;
777
778	if (count == 0)
779		ret = 0;
780	else if ((count % 4) || (*off % 4))
781		/* address or length is not 32-bit aligned, hence invalid */
782		ret = -EINVAL;
783	else if (dc->state < READY && (*off || count != 8))
784		ret = -EINVAL;  /* prevent cat /dev/qib_diag* */
785	else {
786		unsigned long flags;
787		u64 data64 = 0;
788		int use_32;
789		const struct diag_observer *op;
790
791		use_32 = (count % 8) || (*off % 8);
792		ret = -1;
793		spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
794		/*
795		 * Check for observer on this address range.
796		 * we only support a single 32 or 64-bit read
797		 * via observer, currently.
798		 */
799		op = diag_get_observer(dd, *off);
800		if (op) {
801			u32 offset = *off;
802
803			ret = op->hook(dd, op, offset, &data64, 0, use_32);
804		}
805		/*
806		 * We need to release lock before any copy_to_user(),
807		 * whether implicit in qib_read_umem* or explicit below.
808		 */
809		spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
810		if (!op) {
811			if (use_32)
812				/*
813				 * Address or length is not 64-bit aligned;
814				 * do 32-bit rd
815				 */
816				ret = qib_read_umem32(dd, data, (u32) *off,
817						      count);
818			else
819				ret = qib_read_umem64(dd, data, (u32) *off,
820						      count);
821		} else if (ret == count) {
822			/* Below finishes case where observer existed */
823			ret = copy_to_user(data, &data64, use_32 ?
824					   sizeof(u32) : sizeof(u64));
825			if (ret)
826				ret = -EFAULT;
827		}
828	}
829
830	if (ret >= 0) {
831		*off += count;
832		ret = count;
833		if (dc->state == OPENED)
834			dc->state = INIT;
835	}
836bail:
837	return ret;
838}
839
840static ssize_t qib_diag_write(struct file *fp, const char __user *data,
841			      size_t count, loff_t *off)
842{
843	struct qib_diag_client *dc = fp->private_data;
844	struct qib_devdata *dd = dc->dd;
845	void __iomem *kreg_base;
846	ssize_t ret;
847
848	if (dc->pid != current->pid) {
849		ret = -EPERM;
850		goto bail;
851	}
852
853	kreg_base = dd->kregbase;
854
855	if (count == 0)
856		ret = 0;
857	else if ((count % 4) || (*off % 4))
858		/* address or length is not 32-bit aligned, hence invalid */
859		ret = -EINVAL;
860	else if (dc->state < READY &&
861		((*off || count != 8) || dc->state != INIT))
862		/* No writes except second-step of init seq */
863		ret = -EINVAL;  /* before any other write allowed */
864	else {
865		unsigned long flags;
866		const struct diag_observer *op = NULL;
867		int use_32 =  (count % 8) || (*off % 8);
868
869		/*
870		 * Check for observer on this address range.
871		 * We only support a single 32 or 64-bit write
872		 * via observer, currently. This helps, because
873		 * we would otherwise have to jump through hoops
874		 * to make "diag transaction" meaningful when we
875		 * cannot do a copy_from_user while holding the lock.
876		 */
877		if (count == 4 || count == 8) {
878			u64 data64;
879			u32 offset = *off;
880
881			ret = copy_from_user(&data64, data, count);
882			if (ret) {
883				ret = -EFAULT;
884				goto bail;
885			}
886			spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
887			op = diag_get_observer(dd, *off);
888			if (op)
889				ret = op->hook(dd, op, offset, &data64, ~0Ull,
890					       use_32);
891			spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
892		}
893
894		if (!op) {
895			if (use_32)
896				/*
897				 * Address or length is not 64-bit aligned;
898				 * do 32-bit write
899				 */
900				ret = qib_write_umem32(dd, (u32) *off, data,
901						       count);
902			else
903				ret = qib_write_umem64(dd, (u32) *off, data,
904						       count);
905		}
906	}
907
908	if (ret >= 0) {
909		*off += count;
910		ret = count;
911		if (dc->state == INIT)
912			dc->state = READY; /* all read/write OK now */
913	}
914bail:
915	return ret;
916}
917