1/*
2 * TI EDMA DMA engine driver
3 *
4 * Copyright 2012 Texas Instruments
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation version 2.
9 *
10 * This program is distributed "as is" WITHOUT ANY WARRANTY of any
11 * kind, whether express or implied; without even the implied warranty
12 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU General Public License for more details.
14 */
15
16#include <linux/dmaengine.h>
17#include <linux/dma-mapping.h>
18#include <linux/edma.h>
19#include <linux/err.h>
20#include <linux/init.h>
21#include <linux/interrupt.h>
22#include <linux/list.h>
23#include <linux/module.h>
24#include <linux/platform_device.h>
25#include <linux/slab.h>
26#include <linux/spinlock.h>
27#include <linux/of.h>
28
29#include <linux/platform_data/edma.h>
30
31#include "dmaengine.h"
32#include "virt-dma.h"
33
34/*
35 * This will go away when the private EDMA API is folded
36 * into this driver and the platform device(s) are
37 * instantiated in the arch code. We can only get away
38 * with this simplification because DA8XX may not be built
39 * in the same kernel image with other DaVinci parts. This
40 * avoids having to sprinkle dmaengine driver platform devices
41 * and data throughout all the existing board files.
42 */
43#ifdef CONFIG_ARCH_DAVINCI_DA8XX
44#define EDMA_CTLRS	2
45#define EDMA_CHANS	32
46#else
47#define EDMA_CTLRS	1
48#define EDMA_CHANS	64
49#endif /* CONFIG_ARCH_DAVINCI_DA8XX */
50
51/*
52 * Max of 20 segments per channel to conserve PaRAM slots
53 * Also note that MAX_NR_SG should be atleast the no.of periods
54 * that are required for ASoC, otherwise DMA prep calls will
55 * fail. Today davinci-pcm is the only user of this driver and
56 * requires atleast 17 slots, so we setup the default to 20.
57 */
58#define MAX_NR_SG		20
59#define EDMA_MAX_SLOTS		MAX_NR_SG
60#define EDMA_DESCRIPTORS	16
61
62struct edma_pset {
63	u32				len;
64	dma_addr_t			addr;
65	struct edmacc_param		param;
66};
67
68struct edma_desc {
69	struct virt_dma_desc		vdesc;
70	struct list_head		node;
71	enum dma_transfer_direction	direction;
72	int				cyclic;
73	int				absync;
74	int				pset_nr;
75	struct edma_chan		*echan;
76	int				processed;
77
78	/*
79	 * The following 4 elements are used for residue accounting.
80	 *
81	 * - processed_stat: the number of SG elements we have traversed
82	 * so far to cover accounting. This is updated directly to processed
83	 * during edma_callback and is always <= processed, because processed
84	 * refers to the number of pending transfer (programmed to EDMA
85	 * controller), where as processed_stat tracks number of transfers
86	 * accounted for so far.
87	 *
88	 * - residue: The amount of bytes we have left to transfer for this desc
89	 *
90	 * - residue_stat: The residue in bytes of data we have covered
91	 * so far for accounting. This is updated directly to residue
92	 * during callbacks to keep it current.
93	 *
94	 * - sg_len: Tracks the length of the current intermediate transfer,
95	 * this is required to update the residue during intermediate transfer
96	 * completion callback.
97	 */
98	int				processed_stat;
99	u32				sg_len;
100	u32				residue;
101	u32				residue_stat;
102
103	struct edma_pset		pset[0];
104};
105
106struct edma_cc;
107
108struct edma_chan {
109	struct virt_dma_chan		vchan;
110	struct list_head		node;
111	struct edma_desc		*edesc;
112	struct edma_cc			*ecc;
113	int				ch_num;
114	bool				alloced;
115	int				slot[EDMA_MAX_SLOTS];
116	int				missed;
117	struct dma_slave_config		cfg;
118};
119
120struct edma_cc {
121	int				ctlr;
122	struct dma_device		dma_slave;
123	struct edma_chan		slave_chans[EDMA_CHANS];
124	int				num_slave_chans;
125	int				dummy_slot;
126};
127
128static inline struct edma_cc *to_edma_cc(struct dma_device *d)
129{
130	return container_of(d, struct edma_cc, dma_slave);
131}
132
133static inline struct edma_chan *to_edma_chan(struct dma_chan *c)
134{
135	return container_of(c, struct edma_chan, vchan.chan);
136}
137
138static inline struct edma_desc
139*to_edma_desc(struct dma_async_tx_descriptor *tx)
140{
141	return container_of(tx, struct edma_desc, vdesc.tx);
142}
143
144static void edma_desc_free(struct virt_dma_desc *vdesc)
145{
146	kfree(container_of(vdesc, struct edma_desc, vdesc));
147}
148
149/* Dispatch a queued descriptor to the controller (caller holds lock) */
150static void edma_execute(struct edma_chan *echan)
151{
152	struct virt_dma_desc *vdesc;
153	struct edma_desc *edesc;
154	struct device *dev = echan->vchan.chan.device->dev;
155	int i, j, left, nslots;
156
157	/* If either we processed all psets or we're still not started */
158	if (!echan->edesc ||
159	    echan->edesc->pset_nr == echan->edesc->processed) {
160		/* Get next vdesc */
161		vdesc = vchan_next_desc(&echan->vchan);
162		if (!vdesc) {
163			echan->edesc = NULL;
164			return;
165		}
166		list_del(&vdesc->node);
167		echan->edesc = to_edma_desc(&vdesc->tx);
168	}
169
170	edesc = echan->edesc;
171
172	/* Find out how many left */
173	left = edesc->pset_nr - edesc->processed;
174	nslots = min(MAX_NR_SG, left);
175	edesc->sg_len = 0;
176
177	/* Write descriptor PaRAM set(s) */
178	for (i = 0; i < nslots; i++) {
179		j = i + edesc->processed;
180		edma_write_slot(echan->slot[i], &edesc->pset[j].param);
181		edesc->sg_len += edesc->pset[j].len;
182		dev_vdbg(echan->vchan.chan.device->dev,
183			"\n pset[%d]:\n"
184			"  chnum\t%d\n"
185			"  slot\t%d\n"
186			"  opt\t%08x\n"
187			"  src\t%08x\n"
188			"  dst\t%08x\n"
189			"  abcnt\t%08x\n"
190			"  ccnt\t%08x\n"
191			"  bidx\t%08x\n"
192			"  cidx\t%08x\n"
193			"  lkrld\t%08x\n",
194			j, echan->ch_num, echan->slot[i],
195			edesc->pset[j].param.opt,
196			edesc->pset[j].param.src,
197			edesc->pset[j].param.dst,
198			edesc->pset[j].param.a_b_cnt,
199			edesc->pset[j].param.ccnt,
200			edesc->pset[j].param.src_dst_bidx,
201			edesc->pset[j].param.src_dst_cidx,
202			edesc->pset[j].param.link_bcntrld);
203		/* Link to the previous slot if not the last set */
204		if (i != (nslots - 1))
205			edma_link(echan->slot[i], echan->slot[i+1]);
206	}
207
208	edesc->processed += nslots;
209
210	/*
211	 * If this is either the last set in a set of SG-list transactions
212	 * then setup a link to the dummy slot, this results in all future
213	 * events being absorbed and that's OK because we're done
214	 */
215	if (edesc->processed == edesc->pset_nr) {
216		if (edesc->cyclic)
217			edma_link(echan->slot[nslots-1], echan->slot[1]);
218		else
219			edma_link(echan->slot[nslots-1],
220				  echan->ecc->dummy_slot);
221	}
222
223	if (edesc->processed <= MAX_NR_SG) {
224		dev_dbg(dev, "first transfer starting on channel %d\n",
225			echan->ch_num);
226		edma_start(echan->ch_num);
227	} else {
228		dev_dbg(dev, "chan: %d: completed %d elements, resuming\n",
229			echan->ch_num, edesc->processed);
230		edma_resume(echan->ch_num);
231	}
232
233	/*
234	 * This happens due to setup times between intermediate transfers
235	 * in long SG lists which have to be broken up into transfers of
236	 * MAX_NR_SG
237	 */
238	if (echan->missed) {
239		dev_dbg(dev, "missed event on channel %d\n", echan->ch_num);
240		edma_clean_channel(echan->ch_num);
241		edma_stop(echan->ch_num);
242		edma_start(echan->ch_num);
243		edma_trigger_channel(echan->ch_num);
244		echan->missed = 0;
245	}
246}
247
248static int edma_terminate_all(struct dma_chan *chan)
249{
250	struct edma_chan *echan = to_edma_chan(chan);
251	unsigned long flags;
252	LIST_HEAD(head);
253
254	spin_lock_irqsave(&echan->vchan.lock, flags);
255
256	/*
257	 * Stop DMA activity: we assume the callback will not be called
258	 * after edma_dma() returns (even if it does, it will see
259	 * echan->edesc is NULL and exit.)
260	 */
261	if (echan->edesc) {
262		int cyclic = echan->edesc->cyclic;
263
264		/*
265		 * free the running request descriptor
266		 * since it is not in any of the vdesc lists
267		 */
268		edma_desc_free(&echan->edesc->vdesc);
269
270		echan->edesc = NULL;
271		edma_stop(echan->ch_num);
272		/* Move the cyclic channel back to default queue */
273		if (cyclic)
274			edma_assign_channel_eventq(echan->ch_num,
275						   EVENTQ_DEFAULT);
276	}
277
278	vchan_get_all_descriptors(&echan->vchan, &head);
279	spin_unlock_irqrestore(&echan->vchan.lock, flags);
280	vchan_dma_desc_free_list(&echan->vchan, &head);
281
282	return 0;
283}
284
285static int edma_slave_config(struct dma_chan *chan,
286	struct dma_slave_config *cfg)
287{
288	struct edma_chan *echan = to_edma_chan(chan);
289
290	if (cfg->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES ||
291	    cfg->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
292		return -EINVAL;
293
294	memcpy(&echan->cfg, cfg, sizeof(echan->cfg));
295
296	return 0;
297}
298
299static int edma_dma_pause(struct dma_chan *chan)
300{
301	struct edma_chan *echan = to_edma_chan(chan);
302
303	/* Pause/Resume only allowed with cyclic mode */
304	if (!echan->edesc || !echan->edesc->cyclic)
305		return -EINVAL;
306
307	edma_pause(echan->ch_num);
308	return 0;
309}
310
311static int edma_dma_resume(struct dma_chan *chan)
312{
313	struct edma_chan *echan = to_edma_chan(chan);
314
315	/* Pause/Resume only allowed with cyclic mode */
316	if (!echan->edesc->cyclic)
317		return -EINVAL;
318
319	edma_resume(echan->ch_num);
320	return 0;
321}
322
323/*
324 * A PaRAM set configuration abstraction used by other modes
325 * @chan: Channel who's PaRAM set we're configuring
326 * @pset: PaRAM set to initialize and setup.
327 * @src_addr: Source address of the DMA
328 * @dst_addr: Destination address of the DMA
329 * @burst: In units of dev_width, how much to send
330 * @dev_width: How much is the dev_width
331 * @dma_length: Total length of the DMA transfer
332 * @direction: Direction of the transfer
333 */
334static int edma_config_pset(struct dma_chan *chan, struct edma_pset *epset,
335	dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst,
336	enum dma_slave_buswidth dev_width, unsigned int dma_length,
337	enum dma_transfer_direction direction)
338{
339	struct edma_chan *echan = to_edma_chan(chan);
340	struct device *dev = chan->device->dev;
341	struct edmacc_param *param = &epset->param;
342	int acnt, bcnt, ccnt, cidx;
343	int src_bidx, dst_bidx, src_cidx, dst_cidx;
344	int absync;
345
346	acnt = dev_width;
347
348	/* src/dst_maxburst == 0 is the same case as src/dst_maxburst == 1 */
349	if (!burst)
350		burst = 1;
351	/*
352	 * If the maxburst is equal to the fifo width, use
353	 * A-synced transfers. This allows for large contiguous
354	 * buffer transfers using only one PaRAM set.
355	 */
356	if (burst == 1) {
357		/*
358		 * For the A-sync case, bcnt and ccnt are the remainder
359		 * and quotient respectively of the division of:
360		 * (dma_length / acnt) by (SZ_64K -1). This is so
361		 * that in case bcnt over flows, we have ccnt to use.
362		 * Note: In A-sync tranfer only, bcntrld is used, but it
363		 * only applies for sg_dma_len(sg) >= SZ_64K.
364		 * In this case, the best way adopted is- bccnt for the
365		 * first frame will be the remainder below. Then for
366		 * every successive frame, bcnt will be SZ_64K-1. This
367		 * is assured as bcntrld = 0xffff in end of function.
368		 */
369		absync = false;
370		ccnt = dma_length / acnt / (SZ_64K - 1);
371		bcnt = dma_length / acnt - ccnt * (SZ_64K - 1);
372		/*
373		 * If bcnt is non-zero, we have a remainder and hence an
374		 * extra frame to transfer, so increment ccnt.
375		 */
376		if (bcnt)
377			ccnt++;
378		else
379			bcnt = SZ_64K - 1;
380		cidx = acnt;
381	} else {
382		/*
383		 * If maxburst is greater than the fifo address_width,
384		 * use AB-synced transfers where A count is the fifo
385		 * address_width and B count is the maxburst. In this
386		 * case, we are limited to transfers of C count frames
387		 * of (address_width * maxburst) where C count is limited
388		 * to SZ_64K-1. This places an upper bound on the length
389		 * of an SG segment that can be handled.
390		 */
391		absync = true;
392		bcnt = burst;
393		ccnt = dma_length / (acnt * bcnt);
394		if (ccnt > (SZ_64K - 1)) {
395			dev_err(dev, "Exceeded max SG segment size\n");
396			return -EINVAL;
397		}
398		cidx = acnt * bcnt;
399	}
400
401	epset->len = dma_length;
402
403	if (direction == DMA_MEM_TO_DEV) {
404		src_bidx = acnt;
405		src_cidx = cidx;
406		dst_bidx = 0;
407		dst_cidx = 0;
408		epset->addr = src_addr;
409	} else if (direction == DMA_DEV_TO_MEM)  {
410		src_bidx = 0;
411		src_cidx = 0;
412		dst_bidx = acnt;
413		dst_cidx = cidx;
414		epset->addr = dst_addr;
415	} else if (direction == DMA_MEM_TO_MEM)  {
416		src_bidx = acnt;
417		src_cidx = cidx;
418		dst_bidx = acnt;
419		dst_cidx = cidx;
420	} else {
421		dev_err(dev, "%s: direction not implemented yet\n", __func__);
422		return -EINVAL;
423	}
424
425	param->opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num));
426	/* Configure A or AB synchronized transfers */
427	if (absync)
428		param->opt |= SYNCDIM;
429
430	param->src = src_addr;
431	param->dst = dst_addr;
432
433	param->src_dst_bidx = (dst_bidx << 16) | src_bidx;
434	param->src_dst_cidx = (dst_cidx << 16) | src_cidx;
435
436	param->a_b_cnt = bcnt << 16 | acnt;
437	param->ccnt = ccnt;
438	/*
439	 * Only time when (bcntrld) auto reload is required is for
440	 * A-sync case, and in this case, a requirement of reload value
441	 * of SZ_64K-1 only is assured. 'link' is initially set to NULL
442	 * and then later will be populated by edma_execute.
443	 */
444	param->link_bcntrld = 0xffffffff;
445	return absync;
446}
447
448static struct dma_async_tx_descriptor *edma_prep_slave_sg(
449	struct dma_chan *chan, struct scatterlist *sgl,
450	unsigned int sg_len, enum dma_transfer_direction direction,
451	unsigned long tx_flags, void *context)
452{
453	struct edma_chan *echan = to_edma_chan(chan);
454	struct device *dev = chan->device->dev;
455	struct edma_desc *edesc;
456	dma_addr_t src_addr = 0, dst_addr = 0;
457	enum dma_slave_buswidth dev_width;
458	u32 burst;
459	struct scatterlist *sg;
460	int i, nslots, ret;
461
462	if (unlikely(!echan || !sgl || !sg_len))
463		return NULL;
464
465	if (direction == DMA_DEV_TO_MEM) {
466		src_addr = echan->cfg.src_addr;
467		dev_width = echan->cfg.src_addr_width;
468		burst = echan->cfg.src_maxburst;
469	} else if (direction == DMA_MEM_TO_DEV) {
470		dst_addr = echan->cfg.dst_addr;
471		dev_width = echan->cfg.dst_addr_width;
472		burst = echan->cfg.dst_maxburst;
473	} else {
474		dev_err(dev, "%s: bad direction: %d\n", __func__, direction);
475		return NULL;
476	}
477
478	if (dev_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) {
479		dev_err(dev, "%s: Undefined slave buswidth\n", __func__);
480		return NULL;
481	}
482
483	edesc = kzalloc(sizeof(*edesc) + sg_len *
484		sizeof(edesc->pset[0]), GFP_ATOMIC);
485	if (!edesc) {
486		dev_err(dev, "%s: Failed to allocate a descriptor\n", __func__);
487		return NULL;
488	}
489
490	edesc->pset_nr = sg_len;
491	edesc->residue = 0;
492	edesc->direction = direction;
493	edesc->echan = echan;
494
495	/* Allocate a PaRAM slot, if needed */
496	nslots = min_t(unsigned, MAX_NR_SG, sg_len);
497
498	for (i = 0; i < nslots; i++) {
499		if (echan->slot[i] < 0) {
500			echan->slot[i] =
501				edma_alloc_slot(EDMA_CTLR(echan->ch_num),
502						EDMA_SLOT_ANY);
503			if (echan->slot[i] < 0) {
504				kfree(edesc);
505				dev_err(dev, "%s: Failed to allocate slot\n",
506					__func__);
507				return NULL;
508			}
509		}
510	}
511
512	/* Configure PaRAM sets for each SG */
513	for_each_sg(sgl, sg, sg_len, i) {
514		/* Get address for each SG */
515		if (direction == DMA_DEV_TO_MEM)
516			dst_addr = sg_dma_address(sg);
517		else
518			src_addr = sg_dma_address(sg);
519
520		ret = edma_config_pset(chan, &edesc->pset[i], src_addr,
521				       dst_addr, burst, dev_width,
522				       sg_dma_len(sg), direction);
523		if (ret < 0) {
524			kfree(edesc);
525			return NULL;
526		}
527
528		edesc->absync = ret;
529		edesc->residue += sg_dma_len(sg);
530
531		/* If this is the last in a current SG set of transactions,
532		   enable interrupts so that next set is processed */
533		if (!((i+1) % MAX_NR_SG))
534			edesc->pset[i].param.opt |= TCINTEN;
535
536		/* If this is the last set, enable completion interrupt flag */
537		if (i == sg_len - 1)
538			edesc->pset[i].param.opt |= TCINTEN;
539	}
540	edesc->residue_stat = edesc->residue;
541
542	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
543}
544
545static struct dma_async_tx_descriptor *edma_prep_dma_memcpy(
546	struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
547	size_t len, unsigned long tx_flags)
548{
549	int ret;
550	struct edma_desc *edesc;
551	struct device *dev = chan->device->dev;
552	struct edma_chan *echan = to_edma_chan(chan);
553
554	if (unlikely(!echan || !len))
555		return NULL;
556
557	edesc = kzalloc(sizeof(*edesc) + sizeof(edesc->pset[0]), GFP_ATOMIC);
558	if (!edesc) {
559		dev_dbg(dev, "Failed to allocate a descriptor\n");
560		return NULL;
561	}
562
563	edesc->pset_nr = 1;
564
565	ret = edma_config_pset(chan, &edesc->pset[0], src, dest, 1,
566			       DMA_SLAVE_BUSWIDTH_4_BYTES, len, DMA_MEM_TO_MEM);
567	if (ret < 0)
568		return NULL;
569
570	edesc->absync = ret;
571
572	/*
573	 * Enable intermediate transfer chaining to re-trigger channel
574	 * on completion of every TR, and enable transfer-completion
575	 * interrupt on completion of the whole transfer.
576	 */
577	edesc->pset[0].param.opt |= ITCCHEN;
578	edesc->pset[0].param.opt |= TCINTEN;
579
580	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
581}
582
583static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
584	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
585	size_t period_len, enum dma_transfer_direction direction,
586	unsigned long tx_flags)
587{
588	struct edma_chan *echan = to_edma_chan(chan);
589	struct device *dev = chan->device->dev;
590	struct edma_desc *edesc;
591	dma_addr_t src_addr, dst_addr;
592	enum dma_slave_buswidth dev_width;
593	u32 burst;
594	int i, ret, nslots;
595
596	if (unlikely(!echan || !buf_len || !period_len))
597		return NULL;
598
599	if (direction == DMA_DEV_TO_MEM) {
600		src_addr = echan->cfg.src_addr;
601		dst_addr = buf_addr;
602		dev_width = echan->cfg.src_addr_width;
603		burst = echan->cfg.src_maxburst;
604	} else if (direction == DMA_MEM_TO_DEV) {
605		src_addr = buf_addr;
606		dst_addr = echan->cfg.dst_addr;
607		dev_width = echan->cfg.dst_addr_width;
608		burst = echan->cfg.dst_maxburst;
609	} else {
610		dev_err(dev, "%s: bad direction: %d\n", __func__, direction);
611		return NULL;
612	}
613
614	if (dev_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) {
615		dev_err(dev, "%s: Undefined slave buswidth\n", __func__);
616		return NULL;
617	}
618
619	if (unlikely(buf_len % period_len)) {
620		dev_err(dev, "Period should be multiple of Buffer length\n");
621		return NULL;
622	}
623
624	nslots = (buf_len / period_len) + 1;
625
626	/*
627	 * Cyclic DMA users such as audio cannot tolerate delays introduced
628	 * by cases where the number of periods is more than the maximum
629	 * number of SGs the EDMA driver can handle at a time. For DMA types
630	 * such as Slave SGs, such delays are tolerable and synchronized,
631	 * but the synchronization is difficult to achieve with Cyclic and
632	 * cannot be guaranteed, so we error out early.
633	 */
634	if (nslots > MAX_NR_SG)
635		return NULL;
636
637	edesc = kzalloc(sizeof(*edesc) + nslots *
638		sizeof(edesc->pset[0]), GFP_ATOMIC);
639	if (!edesc) {
640		dev_err(dev, "%s: Failed to allocate a descriptor\n", __func__);
641		return NULL;
642	}
643
644	edesc->cyclic = 1;
645	edesc->pset_nr = nslots;
646	edesc->residue = edesc->residue_stat = buf_len;
647	edesc->direction = direction;
648	edesc->echan = echan;
649
650	dev_dbg(dev, "%s: channel=%d nslots=%d period_len=%zu buf_len=%zu\n",
651		__func__, echan->ch_num, nslots, period_len, buf_len);
652
653	for (i = 0; i < nslots; i++) {
654		/* Allocate a PaRAM slot, if needed */
655		if (echan->slot[i] < 0) {
656			echan->slot[i] =
657				edma_alloc_slot(EDMA_CTLR(echan->ch_num),
658						EDMA_SLOT_ANY);
659			if (echan->slot[i] < 0) {
660				kfree(edesc);
661				dev_err(dev, "%s: Failed to allocate slot\n",
662					__func__);
663				return NULL;
664			}
665		}
666
667		if (i == nslots - 1) {
668			memcpy(&edesc->pset[i], &edesc->pset[0],
669			       sizeof(edesc->pset[0]));
670			break;
671		}
672
673		ret = edma_config_pset(chan, &edesc->pset[i], src_addr,
674				       dst_addr, burst, dev_width, period_len,
675				       direction);
676		if (ret < 0) {
677			kfree(edesc);
678			return NULL;
679		}
680
681		if (direction == DMA_DEV_TO_MEM)
682			dst_addr += period_len;
683		else
684			src_addr += period_len;
685
686		dev_vdbg(dev, "%s: Configure period %d of buf:\n", __func__, i);
687		dev_vdbg(dev,
688			"\n pset[%d]:\n"
689			"  chnum\t%d\n"
690			"  slot\t%d\n"
691			"  opt\t%08x\n"
692			"  src\t%08x\n"
693			"  dst\t%08x\n"
694			"  abcnt\t%08x\n"
695			"  ccnt\t%08x\n"
696			"  bidx\t%08x\n"
697			"  cidx\t%08x\n"
698			"  lkrld\t%08x\n",
699			i, echan->ch_num, echan->slot[i],
700			edesc->pset[i].param.opt,
701			edesc->pset[i].param.src,
702			edesc->pset[i].param.dst,
703			edesc->pset[i].param.a_b_cnt,
704			edesc->pset[i].param.ccnt,
705			edesc->pset[i].param.src_dst_bidx,
706			edesc->pset[i].param.src_dst_cidx,
707			edesc->pset[i].param.link_bcntrld);
708
709		edesc->absync = ret;
710
711		/*
712		 * Enable period interrupt only if it is requested
713		 */
714		if (tx_flags & DMA_PREP_INTERRUPT)
715			edesc->pset[i].param.opt |= TCINTEN;
716	}
717
718	/* Place the cyclic channel to highest priority queue */
719	edma_assign_channel_eventq(echan->ch_num, EVENTQ_0);
720
721	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
722}
723
724static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
725{
726	struct edma_chan *echan = data;
727	struct device *dev = echan->vchan.chan.device->dev;
728	struct edma_desc *edesc;
729	struct edmacc_param p;
730
731	edesc = echan->edesc;
732
733	/* Pause the channel for non-cyclic */
734	if (!edesc || (edesc && !edesc->cyclic))
735		edma_pause(echan->ch_num);
736
737	switch (ch_status) {
738	case EDMA_DMA_COMPLETE:
739		spin_lock(&echan->vchan.lock);
740
741		if (edesc) {
742			if (edesc->cyclic) {
743				vchan_cyclic_callback(&edesc->vdesc);
744			} else if (edesc->processed == edesc->pset_nr) {
745				dev_dbg(dev, "Transfer complete, stopping channel %d\n", ch_num);
746				edesc->residue = 0;
747				edma_stop(echan->ch_num);
748				vchan_cookie_complete(&edesc->vdesc);
749				edma_execute(echan);
750			} else {
751				dev_dbg(dev, "Intermediate transfer complete on channel %d\n", ch_num);
752
753				/* Update statistics for tx_status */
754				edesc->residue -= edesc->sg_len;
755				edesc->residue_stat = edesc->residue;
756				edesc->processed_stat = edesc->processed;
757
758				edma_execute(echan);
759			}
760		}
761
762		spin_unlock(&echan->vchan.lock);
763
764		break;
765	case EDMA_DMA_CC_ERROR:
766		spin_lock(&echan->vchan.lock);
767
768		edma_read_slot(EDMA_CHAN_SLOT(echan->slot[0]), &p);
769
770		/*
771		 * Issue later based on missed flag which will be sure
772		 * to happen as:
773		 * (1) we finished transmitting an intermediate slot and
774		 *     edma_execute is coming up.
775		 * (2) or we finished current transfer and issue will
776		 *     call edma_execute.
777		 *
778		 * Important note: issuing can be dangerous here and
779		 * lead to some nasty recursion when we are in a NULL
780		 * slot. So we avoid doing so and set the missed flag.
781		 */
782		if (p.a_b_cnt == 0 && p.ccnt == 0) {
783			dev_dbg(dev, "Error occurred, looks like slot is null, just setting miss\n");
784			echan->missed = 1;
785		} else {
786			/*
787			 * The slot is already programmed but the event got
788			 * missed, so its safe to issue it here.
789			 */
790			dev_dbg(dev, "Error occurred but slot is non-null, TRIGGERING\n");
791			edma_clean_channel(echan->ch_num);
792			edma_stop(echan->ch_num);
793			edma_start(echan->ch_num);
794			edma_trigger_channel(echan->ch_num);
795		}
796
797		spin_unlock(&echan->vchan.lock);
798
799		break;
800	default:
801		break;
802	}
803}
804
805/* Alloc channel resources */
806static int edma_alloc_chan_resources(struct dma_chan *chan)
807{
808	struct edma_chan *echan = to_edma_chan(chan);
809	struct device *dev = chan->device->dev;
810	int ret;
811	int a_ch_num;
812	LIST_HEAD(descs);
813
814	a_ch_num = edma_alloc_channel(echan->ch_num, edma_callback,
815					echan, EVENTQ_DEFAULT);
816
817	if (a_ch_num < 0) {
818		ret = -ENODEV;
819		goto err_no_chan;
820	}
821
822	if (a_ch_num != echan->ch_num) {
823		dev_err(dev, "failed to allocate requested channel %u:%u\n",
824			EDMA_CTLR(echan->ch_num),
825			EDMA_CHAN_SLOT(echan->ch_num));
826		ret = -ENODEV;
827		goto err_wrong_chan;
828	}
829
830	echan->alloced = true;
831	echan->slot[0] = echan->ch_num;
832
833	dev_dbg(dev, "allocated channel %d for %u:%u\n", echan->ch_num,
834		EDMA_CTLR(echan->ch_num), EDMA_CHAN_SLOT(echan->ch_num));
835
836	return 0;
837
838err_wrong_chan:
839	edma_free_channel(a_ch_num);
840err_no_chan:
841	return ret;
842}
843
844/* Free channel resources */
845static void edma_free_chan_resources(struct dma_chan *chan)
846{
847	struct edma_chan *echan = to_edma_chan(chan);
848	struct device *dev = chan->device->dev;
849	int i;
850
851	/* Terminate transfers */
852	edma_stop(echan->ch_num);
853
854	vchan_free_chan_resources(&echan->vchan);
855
856	/* Free EDMA PaRAM slots */
857	for (i = 1; i < EDMA_MAX_SLOTS; i++) {
858		if (echan->slot[i] >= 0) {
859			edma_free_slot(echan->slot[i]);
860			echan->slot[i] = -1;
861		}
862	}
863
864	/* Free EDMA channel */
865	if (echan->alloced) {
866		edma_free_channel(echan->ch_num);
867		echan->alloced = false;
868	}
869
870	dev_dbg(dev, "freeing channel for %u\n", echan->ch_num);
871}
872
873/* Send pending descriptor to hardware */
874static void edma_issue_pending(struct dma_chan *chan)
875{
876	struct edma_chan *echan = to_edma_chan(chan);
877	unsigned long flags;
878
879	spin_lock_irqsave(&echan->vchan.lock, flags);
880	if (vchan_issue_pending(&echan->vchan) && !echan->edesc)
881		edma_execute(echan);
882	spin_unlock_irqrestore(&echan->vchan.lock, flags);
883}
884
885static u32 edma_residue(struct edma_desc *edesc)
886{
887	bool dst = edesc->direction == DMA_DEV_TO_MEM;
888	struct edma_pset *pset = edesc->pset;
889	dma_addr_t done, pos;
890	int i;
891
892	/*
893	 * We always read the dst/src position from the first RamPar
894	 * pset. That's the one which is active now.
895	 */
896	pos = edma_get_position(edesc->echan->slot[0], dst);
897
898	/*
899	 * Cyclic is simple. Just subtract pset[0].addr from pos.
900	 *
901	 * We never update edesc->residue in the cyclic case, so we
902	 * can tell the remaining room to the end of the circular
903	 * buffer.
904	 */
905	if (edesc->cyclic) {
906		done = pos - pset->addr;
907		edesc->residue_stat = edesc->residue - done;
908		return edesc->residue_stat;
909	}
910
911	/*
912	 * For SG operation we catch up with the last processed
913	 * status.
914	 */
915	pset += edesc->processed_stat;
916
917	for (i = edesc->processed_stat; i < edesc->processed; i++, pset++) {
918		/*
919		 * If we are inside this pset address range, we know
920		 * this is the active one. Get the current delta and
921		 * stop walking the psets.
922		 */
923		if (pos >= pset->addr && pos < pset->addr + pset->len)
924			return edesc->residue_stat - (pos - pset->addr);
925
926		/* Otherwise mark it done and update residue_stat. */
927		edesc->processed_stat++;
928		edesc->residue_stat -= pset->len;
929	}
930	return edesc->residue_stat;
931}
932
933/* Check request completion status */
934static enum dma_status edma_tx_status(struct dma_chan *chan,
935				      dma_cookie_t cookie,
936				      struct dma_tx_state *txstate)
937{
938	struct edma_chan *echan = to_edma_chan(chan);
939	struct virt_dma_desc *vdesc;
940	enum dma_status ret;
941	unsigned long flags;
942
943	ret = dma_cookie_status(chan, cookie, txstate);
944	if (ret == DMA_COMPLETE || !txstate)
945		return ret;
946
947	spin_lock_irqsave(&echan->vchan.lock, flags);
948	if (echan->edesc && echan->edesc->vdesc.tx.cookie == cookie)
949		txstate->residue = edma_residue(echan->edesc);
950	else if ((vdesc = vchan_find_desc(&echan->vchan, cookie)))
951		txstate->residue = to_edma_desc(&vdesc->tx)->residue;
952	spin_unlock_irqrestore(&echan->vchan.lock, flags);
953
954	return ret;
955}
956
957static void __init edma_chan_init(struct edma_cc *ecc,
958				  struct dma_device *dma,
959				  struct edma_chan *echans)
960{
961	int i, j;
962
963	for (i = 0; i < EDMA_CHANS; i++) {
964		struct edma_chan *echan = &echans[i];
965		echan->ch_num = EDMA_CTLR_CHAN(ecc->ctlr, i);
966		echan->ecc = ecc;
967		echan->vchan.desc_free = edma_desc_free;
968
969		vchan_init(&echan->vchan, dma);
970
971		INIT_LIST_HEAD(&echan->node);
972		for (j = 0; j < EDMA_MAX_SLOTS; j++)
973			echan->slot[j] = -1;
974	}
975}
976
977#define EDMA_DMA_BUSWIDTHS	(BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
978				 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
979				 BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
980				 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
981
982static void edma_dma_init(struct edma_cc *ecc, struct dma_device *dma,
983			  struct device *dev)
984{
985	dma->device_prep_slave_sg = edma_prep_slave_sg;
986	dma->device_prep_dma_cyclic = edma_prep_dma_cyclic;
987	dma->device_prep_dma_memcpy = edma_prep_dma_memcpy;
988	dma->device_alloc_chan_resources = edma_alloc_chan_resources;
989	dma->device_free_chan_resources = edma_free_chan_resources;
990	dma->device_issue_pending = edma_issue_pending;
991	dma->device_tx_status = edma_tx_status;
992	dma->device_config = edma_slave_config;
993	dma->device_pause = edma_dma_pause;
994	dma->device_resume = edma_dma_resume;
995	dma->device_terminate_all = edma_terminate_all;
996
997	dma->src_addr_widths = EDMA_DMA_BUSWIDTHS;
998	dma->dst_addr_widths = EDMA_DMA_BUSWIDTHS;
999	dma->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
1000	dma->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
1001
1002	dma->dev = dev;
1003
1004	/*
1005	 * code using dma memcpy must make sure alignment of
1006	 * length is at dma->copy_align boundary.
1007	 */
1008	dma->copy_align = DMA_SLAVE_BUSWIDTH_4_BYTES;
1009
1010	INIT_LIST_HEAD(&dma->channels);
1011}
1012
1013static int edma_probe(struct platform_device *pdev)
1014{
1015	struct edma_cc *ecc;
1016	int ret;
1017
1018	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
1019	if (ret)
1020		return ret;
1021
1022	ecc = devm_kzalloc(&pdev->dev, sizeof(*ecc), GFP_KERNEL);
1023	if (!ecc) {
1024		dev_err(&pdev->dev, "Can't allocate controller\n");
1025		return -ENOMEM;
1026	}
1027
1028	ecc->ctlr = pdev->id;
1029	ecc->dummy_slot = edma_alloc_slot(ecc->ctlr, EDMA_SLOT_ANY);
1030	if (ecc->dummy_slot < 0) {
1031		dev_err(&pdev->dev, "Can't allocate PaRAM dummy slot\n");
1032		return ecc->dummy_slot;
1033	}
1034
1035	dma_cap_zero(ecc->dma_slave.cap_mask);
1036	dma_cap_set(DMA_SLAVE, ecc->dma_slave.cap_mask);
1037	dma_cap_set(DMA_CYCLIC, ecc->dma_slave.cap_mask);
1038	dma_cap_set(DMA_MEMCPY, ecc->dma_slave.cap_mask);
1039
1040	edma_dma_init(ecc, &ecc->dma_slave, &pdev->dev);
1041
1042	edma_chan_init(ecc, &ecc->dma_slave, ecc->slave_chans);
1043
1044	ret = dma_async_device_register(&ecc->dma_slave);
1045	if (ret)
1046		goto err_reg1;
1047
1048	platform_set_drvdata(pdev, ecc);
1049
1050	dev_info(&pdev->dev, "TI EDMA DMA engine driver\n");
1051
1052	return 0;
1053
1054err_reg1:
1055	edma_free_slot(ecc->dummy_slot);
1056	return ret;
1057}
1058
1059static int edma_remove(struct platform_device *pdev)
1060{
1061	struct device *dev = &pdev->dev;
1062	struct edma_cc *ecc = dev_get_drvdata(dev);
1063
1064	dma_async_device_unregister(&ecc->dma_slave);
1065	edma_free_slot(ecc->dummy_slot);
1066
1067	return 0;
1068}
1069
1070static struct platform_driver edma_driver = {
1071	.probe		= edma_probe,
1072	.remove		= edma_remove,
1073	.driver = {
1074		.name = "edma-dma-engine",
1075	},
1076};
1077
1078bool edma_filter_fn(struct dma_chan *chan, void *param)
1079{
1080	if (chan->device->dev->driver == &edma_driver.driver) {
1081		struct edma_chan *echan = to_edma_chan(chan);
1082		unsigned ch_req = *(unsigned *)param;
1083		return ch_req == echan->ch_num;
1084	}
1085	return false;
1086}
1087EXPORT_SYMBOL(edma_filter_fn);
1088
1089static int edma_init(void)
1090{
1091	return platform_driver_register(&edma_driver);
1092}
1093subsys_initcall(edma_init);
1094
1095static void __exit edma_exit(void)
1096{
1097	platform_driver_unregister(&edma_driver);
1098}
1099module_exit(edma_exit);
1100
1101MODULE_AUTHOR("Matt Porter <matt.porter@linaro.org>");
1102MODULE_DESCRIPTION("TI EDMA DMA engine driver");
1103MODULE_LICENSE("GPL v2");
1104