1/*
2 * offload engine driver for the Marvell XOR engine
3 * Copyright (C) 2007, 2008, Marvell International Ltd.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/init.h>
16#include <linux/module.h>
17#include <linux/slab.h>
18#include <linux/delay.h>
19#include <linux/dma-mapping.h>
20#include <linux/spinlock.h>
21#include <linux/interrupt.h>
22#include <linux/platform_device.h>
23#include <linux/memory.h>
24#include <linux/clk.h>
25#include <linux/of.h>
26#include <linux/of_irq.h>
27#include <linux/irqdomain.h>
28#include <linux/platform_data/dma-mv_xor.h>
29
30#include "dmaengine.h"
31#include "mv_xor.h"
32
33static void mv_xor_issue_pending(struct dma_chan *chan);
34
35#define to_mv_xor_chan(chan)		\
36	container_of(chan, struct mv_xor_chan, dmachan)
37
38#define to_mv_xor_slot(tx)		\
39	container_of(tx, struct mv_xor_desc_slot, async_tx)
40
41#define mv_chan_to_devp(chan)           \
42	((chan)->dmadev.dev)
43
44static void mv_desc_init(struct mv_xor_desc_slot *desc,
45			 dma_addr_t addr, u32 byte_count,
46			 enum dma_ctrl_flags flags)
47{
48	struct mv_xor_desc *hw_desc = desc->hw_desc;
49
50	hw_desc->status = XOR_DESC_DMA_OWNED;
51	hw_desc->phy_next_desc = 0;
52	/* Enable end-of-descriptor interrupts only for DMA_PREP_INTERRUPT */
53	hw_desc->desc_command = (flags & DMA_PREP_INTERRUPT) ?
54				XOR_DESC_EOD_INT_EN : 0;
55	hw_desc->phy_dest_addr = addr;
56	hw_desc->byte_count = byte_count;
57}
58
59static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc,
60				  u32 next_desc_addr)
61{
62	struct mv_xor_desc *hw_desc = desc->hw_desc;
63	BUG_ON(hw_desc->phy_next_desc);
64	hw_desc->phy_next_desc = next_desc_addr;
65}
66
67static void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc)
68{
69	struct mv_xor_desc *hw_desc = desc->hw_desc;
70	hw_desc->phy_next_desc = 0;
71}
72
73static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc,
74				 int index, dma_addr_t addr)
75{
76	struct mv_xor_desc *hw_desc = desc->hw_desc;
77	hw_desc->phy_src_addr[mv_phy_src_idx(index)] = addr;
78	if (desc->type == DMA_XOR)
79		hw_desc->desc_command |= (1 << index);
80}
81
82static u32 mv_chan_get_current_desc(struct mv_xor_chan *chan)
83{
84	return readl_relaxed(XOR_CURR_DESC(chan));
85}
86
87static void mv_chan_set_next_descriptor(struct mv_xor_chan *chan,
88					u32 next_desc_addr)
89{
90	writel_relaxed(next_desc_addr, XOR_NEXT_DESC(chan));
91}
92
93static void mv_chan_unmask_interrupts(struct mv_xor_chan *chan)
94{
95	u32 val = readl_relaxed(XOR_INTR_MASK(chan));
96	val |= XOR_INTR_MASK_VALUE << (chan->idx * 16);
97	writel_relaxed(val, XOR_INTR_MASK(chan));
98}
99
100static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan)
101{
102	u32 intr_cause = readl_relaxed(XOR_INTR_CAUSE(chan));
103	intr_cause = (intr_cause >> (chan->idx * 16)) & 0xFFFF;
104	return intr_cause;
105}
106
107static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan)
108{
109	u32 val;
110
111	val = XOR_INT_END_OF_DESC | XOR_INT_END_OF_CHAIN | XOR_INT_STOPPED;
112	val = ~(val << (chan->idx * 16));
113	dev_dbg(mv_chan_to_devp(chan), "%s, val 0x%08x\n", __func__, val);
114	writel_relaxed(val, XOR_INTR_CAUSE(chan));
115}
116
117static void mv_xor_device_clear_err_status(struct mv_xor_chan *chan)
118{
119	u32 val = 0xFFFF0000 >> (chan->idx * 16);
120	writel_relaxed(val, XOR_INTR_CAUSE(chan));
121}
122
123static void mv_set_mode(struct mv_xor_chan *chan,
124			       enum dma_transaction_type type)
125{
126	u32 op_mode;
127	u32 config = readl_relaxed(XOR_CONFIG(chan));
128
129	switch (type) {
130	case DMA_XOR:
131		op_mode = XOR_OPERATION_MODE_XOR;
132		break;
133	case DMA_MEMCPY:
134		op_mode = XOR_OPERATION_MODE_MEMCPY;
135		break;
136	default:
137		dev_err(mv_chan_to_devp(chan),
138			"error: unsupported operation %d\n",
139			type);
140		BUG();
141		return;
142	}
143
144	config &= ~0x7;
145	config |= op_mode;
146
147#if defined(__BIG_ENDIAN)
148	config |= XOR_DESCRIPTOR_SWAP;
149#else
150	config &= ~XOR_DESCRIPTOR_SWAP;
151#endif
152
153	writel_relaxed(config, XOR_CONFIG(chan));
154	chan->current_type = type;
155}
156
157static void mv_chan_activate(struct mv_xor_chan *chan)
158{
159	dev_dbg(mv_chan_to_devp(chan), " activate chan.\n");
160
161	/* writel ensures all descriptors are flushed before activation */
162	writel(BIT(0), XOR_ACTIVATION(chan));
163}
164
165static char mv_chan_is_busy(struct mv_xor_chan *chan)
166{
167	u32 state = readl_relaxed(XOR_ACTIVATION(chan));
168
169	state = (state >> 4) & 0x3;
170
171	return (state == 1) ? 1 : 0;
172}
173
174/**
175 * mv_xor_free_slots - flags descriptor slots for reuse
176 * @slot: Slot to free
177 * Caller must hold &mv_chan->lock while calling this function
178 */
179static void mv_xor_free_slots(struct mv_xor_chan *mv_chan,
180			      struct mv_xor_desc_slot *slot)
181{
182	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d slot %p\n",
183		__func__, __LINE__, slot);
184
185	slot->slot_used = 0;
186
187}
188
189/*
190 * mv_xor_start_new_chain - program the engine to operate on new chain headed by
191 * sw_desc
192 * Caller must hold &mv_chan->lock while calling this function
193 */
194static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan,
195				   struct mv_xor_desc_slot *sw_desc)
196{
197	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: sw_desc %p\n",
198		__func__, __LINE__, sw_desc);
199
200	/* set the hardware chain */
201	mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys);
202
203	mv_chan->pending++;
204	mv_xor_issue_pending(&mv_chan->dmachan);
205}
206
207static dma_cookie_t
208mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
209	struct mv_xor_chan *mv_chan, dma_cookie_t cookie)
210{
211	BUG_ON(desc->async_tx.cookie < 0);
212
213	if (desc->async_tx.cookie > 0) {
214		cookie = desc->async_tx.cookie;
215
216		/* call the callback (must not sleep or submit new
217		 * operations to this channel)
218		 */
219		if (desc->async_tx.callback)
220			desc->async_tx.callback(
221				desc->async_tx.callback_param);
222
223		dma_descriptor_unmap(&desc->async_tx);
224	}
225
226	/* run dependent operations */
227	dma_run_dependencies(&desc->async_tx);
228
229	return cookie;
230}
231
232static int
233mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan)
234{
235	struct mv_xor_desc_slot *iter, *_iter;
236
237	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__);
238	list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
239				 completed_node) {
240
241		if (async_tx_test_ack(&iter->async_tx)) {
242			list_del(&iter->completed_node);
243			mv_xor_free_slots(mv_chan, iter);
244		}
245	}
246	return 0;
247}
248
249static int
250mv_xor_clean_slot(struct mv_xor_desc_slot *desc,
251	struct mv_xor_chan *mv_chan)
252{
253	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: desc %p flags %d\n",
254		__func__, __LINE__, desc, desc->async_tx.flags);
255	list_del(&desc->chain_node);
256	/* the client is allowed to attach dependent operations
257	 * until 'ack' is set
258	 */
259	if (!async_tx_test_ack(&desc->async_tx)) {
260		/* move this slot to the completed_slots */
261		list_add_tail(&desc->completed_node, &mv_chan->completed_slots);
262		return 0;
263	}
264
265	mv_xor_free_slots(mv_chan, desc);
266	return 0;
267}
268
269/* This function must be called with the mv_xor_chan spinlock held */
270static void mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
271{
272	struct mv_xor_desc_slot *iter, *_iter;
273	dma_cookie_t cookie = 0;
274	int busy = mv_chan_is_busy(mv_chan);
275	u32 current_desc = mv_chan_get_current_desc(mv_chan);
276	int current_cleaned = 0;
277	struct mv_xor_desc *hw_desc;
278
279	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__);
280	dev_dbg(mv_chan_to_devp(mv_chan), "current_desc %x\n", current_desc);
281	mv_xor_clean_completed_slots(mv_chan);
282
283	/* free completed slots from the chain starting with
284	 * the oldest descriptor
285	 */
286
287	list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
288					chain_node) {
289
290		/* clean finished descriptors */
291		hw_desc = iter->hw_desc;
292		if (hw_desc->status & XOR_DESC_SUCCESS) {
293			cookie = mv_xor_run_tx_complete_actions(iter, mv_chan,
294								cookie);
295
296			/* done processing desc, clean slot */
297			mv_xor_clean_slot(iter, mv_chan);
298
299			/* break if we did cleaned the current */
300			if (iter->async_tx.phys == current_desc) {
301				current_cleaned = 1;
302				break;
303			}
304		} else {
305			if (iter->async_tx.phys == current_desc) {
306				current_cleaned = 0;
307				break;
308			}
309		}
310	}
311
312	if ((busy == 0) && !list_empty(&mv_chan->chain)) {
313		if (current_cleaned) {
314			/*
315			 * current descriptor cleaned and removed, run
316			 * from list head
317			 */
318			iter = list_entry(mv_chan->chain.next,
319					  struct mv_xor_desc_slot,
320					  chain_node);
321			mv_xor_start_new_chain(mv_chan, iter);
322		} else {
323			if (!list_is_last(&iter->chain_node, &mv_chan->chain)) {
324				/*
325				 * descriptors are still waiting after
326				 * current, trigger them
327				 */
328				iter = list_entry(iter->chain_node.next,
329						  struct mv_xor_desc_slot,
330						  chain_node);
331				mv_xor_start_new_chain(mv_chan, iter);
332			} else {
333				/*
334				 * some descriptors are still waiting
335				 * to be cleaned
336				 */
337				tasklet_schedule(&mv_chan->irq_tasklet);
338			}
339		}
340	}
341
342	if (cookie > 0)
343		mv_chan->dmachan.completed_cookie = cookie;
344}
345
346static void mv_xor_tasklet(unsigned long data)
347{
348	struct mv_xor_chan *chan = (struct mv_xor_chan *) data;
349
350	spin_lock_bh(&chan->lock);
351	mv_xor_slot_cleanup(chan);
352	spin_unlock_bh(&chan->lock);
353}
354
355static struct mv_xor_desc_slot *
356mv_xor_alloc_slot(struct mv_xor_chan *mv_chan)
357{
358	struct mv_xor_desc_slot *iter, *_iter;
359	int retry = 0;
360
361	/* start search from the last allocated descrtiptor
362	 * if a contiguous allocation can not be found start searching
363	 * from the beginning of the list
364	 */
365retry:
366	if (retry == 0)
367		iter = mv_chan->last_used;
368	else
369		iter = list_entry(&mv_chan->all_slots,
370			struct mv_xor_desc_slot,
371			slot_node);
372
373	list_for_each_entry_safe_continue(
374		iter, _iter, &mv_chan->all_slots, slot_node) {
375
376		prefetch(_iter);
377		prefetch(&_iter->async_tx);
378		if (iter->slot_used) {
379			/* give up after finding the first busy slot
380			 * on the second pass through the list
381			 */
382			if (retry)
383				break;
384			continue;
385		}
386
387		/* pre-ack descriptor */
388		async_tx_ack(&iter->async_tx);
389
390		iter->slot_used = 1;
391		INIT_LIST_HEAD(&iter->chain_node);
392		iter->async_tx.cookie = -EBUSY;
393		mv_chan->last_used = iter;
394		mv_desc_clear_next_desc(iter);
395
396		return iter;
397
398	}
399	if (!retry++)
400		goto retry;
401
402	/* try to free some slots if the allocation fails */
403	tasklet_schedule(&mv_chan->irq_tasklet);
404
405	return NULL;
406}
407
408/************************ DMA engine API functions ****************************/
409static dma_cookie_t
410mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
411{
412	struct mv_xor_desc_slot *sw_desc = to_mv_xor_slot(tx);
413	struct mv_xor_chan *mv_chan = to_mv_xor_chan(tx->chan);
414	struct mv_xor_desc_slot *old_chain_tail;
415	dma_cookie_t cookie;
416	int new_hw_chain = 1;
417
418	dev_dbg(mv_chan_to_devp(mv_chan),
419		"%s sw_desc %p: async_tx %p\n",
420		__func__, sw_desc, &sw_desc->async_tx);
421
422	spin_lock_bh(&mv_chan->lock);
423	cookie = dma_cookie_assign(tx);
424
425	if (list_empty(&mv_chan->chain))
426		list_add_tail(&sw_desc->chain_node, &mv_chan->chain);
427	else {
428		new_hw_chain = 0;
429
430		old_chain_tail = list_entry(mv_chan->chain.prev,
431					    struct mv_xor_desc_slot,
432					    chain_node);
433		list_add_tail(&sw_desc->chain_node, &mv_chan->chain);
434
435		dev_dbg(mv_chan_to_devp(mv_chan), "Append to last desc %pa\n",
436			&old_chain_tail->async_tx.phys);
437
438		/* fix up the hardware chain */
439		mv_desc_set_next_desc(old_chain_tail, sw_desc->async_tx.phys);
440
441		/* if the channel is not busy */
442		if (!mv_chan_is_busy(mv_chan)) {
443			u32 current_desc = mv_chan_get_current_desc(mv_chan);
444			/*
445			 * and the curren desc is the end of the chain before
446			 * the append, then we need to start the channel
447			 */
448			if (current_desc == old_chain_tail->async_tx.phys)
449				new_hw_chain = 1;
450		}
451	}
452
453	if (new_hw_chain)
454		mv_xor_start_new_chain(mv_chan, sw_desc);
455
456	spin_unlock_bh(&mv_chan->lock);
457
458	return cookie;
459}
460
461/* returns the number of allocated descriptors */
462static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
463{
464	void *virt_desc;
465	dma_addr_t dma_desc;
466	int idx;
467	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
468	struct mv_xor_desc_slot *slot = NULL;
469	int num_descs_in_pool = MV_XOR_POOL_SIZE/MV_XOR_SLOT_SIZE;
470
471	/* Allocate descriptor slots */
472	idx = mv_chan->slots_allocated;
473	while (idx < num_descs_in_pool) {
474		slot = kzalloc(sizeof(*slot), GFP_KERNEL);
475		if (!slot) {
476			dev_info(mv_chan_to_devp(mv_chan),
477				 "channel only initialized %d descriptor slots",
478				 idx);
479			break;
480		}
481		virt_desc = mv_chan->dma_desc_pool_virt;
482		slot->hw_desc = virt_desc + idx * MV_XOR_SLOT_SIZE;
483
484		dma_async_tx_descriptor_init(&slot->async_tx, chan);
485		slot->async_tx.tx_submit = mv_xor_tx_submit;
486		INIT_LIST_HEAD(&slot->chain_node);
487		INIT_LIST_HEAD(&slot->slot_node);
488		dma_desc = mv_chan->dma_desc_pool;
489		slot->async_tx.phys = dma_desc + idx * MV_XOR_SLOT_SIZE;
490		slot->idx = idx++;
491
492		spin_lock_bh(&mv_chan->lock);
493		mv_chan->slots_allocated = idx;
494		list_add_tail(&slot->slot_node, &mv_chan->all_slots);
495		spin_unlock_bh(&mv_chan->lock);
496	}
497
498	if (mv_chan->slots_allocated && !mv_chan->last_used)
499		mv_chan->last_used = list_entry(mv_chan->all_slots.next,
500					struct mv_xor_desc_slot,
501					slot_node);
502
503	dev_dbg(mv_chan_to_devp(mv_chan),
504		"allocated %d descriptor slots last_used: %p\n",
505		mv_chan->slots_allocated, mv_chan->last_used);
506
507	return mv_chan->slots_allocated ? : -ENOMEM;
508}
509
510static struct dma_async_tx_descriptor *
511mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
512		    unsigned int src_cnt, size_t len, unsigned long flags)
513{
514	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
515	struct mv_xor_desc_slot *sw_desc;
516
517	if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
518		return NULL;
519
520	BUG_ON(len > MV_XOR_MAX_BYTE_COUNT);
521
522	dev_dbg(mv_chan_to_devp(mv_chan),
523		"%s src_cnt: %d len: %u dest %pad flags: %ld\n",
524		__func__, src_cnt, len, &dest, flags);
525
526	spin_lock_bh(&mv_chan->lock);
527	sw_desc = mv_xor_alloc_slot(mv_chan);
528	if (sw_desc) {
529		sw_desc->type = DMA_XOR;
530		sw_desc->async_tx.flags = flags;
531		mv_desc_init(sw_desc, dest, len, flags);
532		while (src_cnt--)
533			mv_desc_set_src_addr(sw_desc, src_cnt, src[src_cnt]);
534	}
535	spin_unlock_bh(&mv_chan->lock);
536	dev_dbg(mv_chan_to_devp(mv_chan),
537		"%s sw_desc %p async_tx %p \n",
538		__func__, sw_desc, &sw_desc->async_tx);
539	return sw_desc ? &sw_desc->async_tx : NULL;
540}
541
542static struct dma_async_tx_descriptor *
543mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
544		size_t len, unsigned long flags)
545{
546	/*
547	 * A MEMCPY operation is identical to an XOR operation with only
548	 * a single source address.
549	 */
550	return mv_xor_prep_dma_xor(chan, dest, &src, 1, len, flags);
551}
552
553static struct dma_async_tx_descriptor *
554mv_xor_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags)
555{
556	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
557	dma_addr_t src, dest;
558	size_t len;
559
560	src = mv_chan->dummy_src_addr;
561	dest = mv_chan->dummy_dst_addr;
562	len = MV_XOR_MIN_BYTE_COUNT;
563
564	/*
565	 * We implement the DMA_INTERRUPT operation as a minimum sized
566	 * XOR operation with a single dummy source address.
567	 */
568	return mv_xor_prep_dma_xor(chan, dest, &src, 1, len, flags);
569}
570
571static void mv_xor_free_chan_resources(struct dma_chan *chan)
572{
573	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
574	struct mv_xor_desc_slot *iter, *_iter;
575	int in_use_descs = 0;
576
577	spin_lock_bh(&mv_chan->lock);
578
579	mv_xor_slot_cleanup(mv_chan);
580
581	list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
582					chain_node) {
583		in_use_descs++;
584		list_del(&iter->chain_node);
585	}
586	list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
587				 completed_node) {
588		in_use_descs++;
589		list_del(&iter->completed_node);
590	}
591	list_for_each_entry_safe_reverse(
592		iter, _iter, &mv_chan->all_slots, slot_node) {
593		list_del(&iter->slot_node);
594		kfree(iter);
595		mv_chan->slots_allocated--;
596	}
597	mv_chan->last_used = NULL;
598
599	dev_dbg(mv_chan_to_devp(mv_chan), "%s slots_allocated %d\n",
600		__func__, mv_chan->slots_allocated);
601	spin_unlock_bh(&mv_chan->lock);
602
603	if (in_use_descs)
604		dev_err(mv_chan_to_devp(mv_chan),
605			"freeing %d in use descriptors!\n", in_use_descs);
606}
607
608/**
609 * mv_xor_status - poll the status of an XOR transaction
610 * @chan: XOR channel handle
611 * @cookie: XOR transaction identifier
612 * @txstate: XOR transactions state holder (or NULL)
613 */
614static enum dma_status mv_xor_status(struct dma_chan *chan,
615					  dma_cookie_t cookie,
616					  struct dma_tx_state *txstate)
617{
618	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
619	enum dma_status ret;
620
621	ret = dma_cookie_status(chan, cookie, txstate);
622	if (ret == DMA_COMPLETE)
623		return ret;
624
625	spin_lock_bh(&mv_chan->lock);
626	mv_xor_slot_cleanup(mv_chan);
627	spin_unlock_bh(&mv_chan->lock);
628
629	return dma_cookie_status(chan, cookie, txstate);
630}
631
632static void mv_dump_xor_regs(struct mv_xor_chan *chan)
633{
634	u32 val;
635
636	val = readl_relaxed(XOR_CONFIG(chan));
637	dev_err(mv_chan_to_devp(chan), "config       0x%08x\n", val);
638
639	val = readl_relaxed(XOR_ACTIVATION(chan));
640	dev_err(mv_chan_to_devp(chan), "activation   0x%08x\n", val);
641
642	val = readl_relaxed(XOR_INTR_CAUSE(chan));
643	dev_err(mv_chan_to_devp(chan), "intr cause   0x%08x\n", val);
644
645	val = readl_relaxed(XOR_INTR_MASK(chan));
646	dev_err(mv_chan_to_devp(chan), "intr mask    0x%08x\n", val);
647
648	val = readl_relaxed(XOR_ERROR_CAUSE(chan));
649	dev_err(mv_chan_to_devp(chan), "error cause  0x%08x\n", val);
650
651	val = readl_relaxed(XOR_ERROR_ADDR(chan));
652	dev_err(mv_chan_to_devp(chan), "error addr   0x%08x\n", val);
653}
654
655static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan,
656					 u32 intr_cause)
657{
658	if (intr_cause & XOR_INT_ERR_DECODE) {
659		dev_dbg(mv_chan_to_devp(chan), "ignoring address decode error\n");
660		return;
661	}
662
663	dev_err(mv_chan_to_devp(chan), "error on chan %d. intr cause 0x%08x\n",
664		chan->idx, intr_cause);
665
666	mv_dump_xor_regs(chan);
667	WARN_ON(1);
668}
669
670static irqreturn_t mv_xor_interrupt_handler(int irq, void *data)
671{
672	struct mv_xor_chan *chan = data;
673	u32 intr_cause = mv_chan_get_intr_cause(chan);
674
675	dev_dbg(mv_chan_to_devp(chan), "intr cause %x\n", intr_cause);
676
677	if (intr_cause & XOR_INTR_ERRORS)
678		mv_xor_err_interrupt_handler(chan, intr_cause);
679
680	tasklet_schedule(&chan->irq_tasklet);
681
682	mv_xor_device_clear_eoc_cause(chan);
683
684	return IRQ_HANDLED;
685}
686
687static void mv_xor_issue_pending(struct dma_chan *chan)
688{
689	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
690
691	if (mv_chan->pending >= MV_XOR_THRESHOLD) {
692		mv_chan->pending = 0;
693		mv_chan_activate(mv_chan);
694	}
695}
696
697/*
698 * Perform a transaction to verify the HW works.
699 */
700
701static int mv_xor_memcpy_self_test(struct mv_xor_chan *mv_chan)
702{
703	int i, ret;
704	void *src, *dest;
705	dma_addr_t src_dma, dest_dma;
706	struct dma_chan *dma_chan;
707	dma_cookie_t cookie;
708	struct dma_async_tx_descriptor *tx;
709	struct dmaengine_unmap_data *unmap;
710	int err = 0;
711
712	src = kmalloc(sizeof(u8) * PAGE_SIZE, GFP_KERNEL);
713	if (!src)
714		return -ENOMEM;
715
716	dest = kzalloc(sizeof(u8) * PAGE_SIZE, GFP_KERNEL);
717	if (!dest) {
718		kfree(src);
719		return -ENOMEM;
720	}
721
722	/* Fill in src buffer */
723	for (i = 0; i < PAGE_SIZE; i++)
724		((u8 *) src)[i] = (u8)i;
725
726	dma_chan = &mv_chan->dmachan;
727	if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
728		err = -ENODEV;
729		goto out;
730	}
731
732	unmap = dmaengine_get_unmap_data(dma_chan->device->dev, 2, GFP_KERNEL);
733	if (!unmap) {
734		err = -ENOMEM;
735		goto free_resources;
736	}
737
738	src_dma = dma_map_page(dma_chan->device->dev, virt_to_page(src), 0,
739				 PAGE_SIZE, DMA_TO_DEVICE);
740	unmap->addr[0] = src_dma;
741
742	ret = dma_mapping_error(dma_chan->device->dev, src_dma);
743	if (ret) {
744		err = -ENOMEM;
745		goto free_resources;
746	}
747	unmap->to_cnt = 1;
748
749	dest_dma = dma_map_page(dma_chan->device->dev, virt_to_page(dest), 0,
750				  PAGE_SIZE, DMA_FROM_DEVICE);
751	unmap->addr[1] = dest_dma;
752
753	ret = dma_mapping_error(dma_chan->device->dev, dest_dma);
754	if (ret) {
755		err = -ENOMEM;
756		goto free_resources;
757	}
758	unmap->from_cnt = 1;
759	unmap->len = PAGE_SIZE;
760
761	tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
762				    PAGE_SIZE, 0);
763	if (!tx) {
764		dev_err(dma_chan->device->dev,
765			"Self-test cannot prepare operation, disabling\n");
766		err = -ENODEV;
767		goto free_resources;
768	}
769
770	cookie = mv_xor_tx_submit(tx);
771	if (dma_submit_error(cookie)) {
772		dev_err(dma_chan->device->dev,
773			"Self-test submit error, disabling\n");
774		err = -ENODEV;
775		goto free_resources;
776	}
777
778	mv_xor_issue_pending(dma_chan);
779	async_tx_ack(tx);
780	msleep(1);
781
782	if (mv_xor_status(dma_chan, cookie, NULL) !=
783	    DMA_COMPLETE) {
784		dev_err(dma_chan->device->dev,
785			"Self-test copy timed out, disabling\n");
786		err = -ENODEV;
787		goto free_resources;
788	}
789
790	dma_sync_single_for_cpu(dma_chan->device->dev, dest_dma,
791				PAGE_SIZE, DMA_FROM_DEVICE);
792	if (memcmp(src, dest, PAGE_SIZE)) {
793		dev_err(dma_chan->device->dev,
794			"Self-test copy failed compare, disabling\n");
795		err = -ENODEV;
796		goto free_resources;
797	}
798
799free_resources:
800	dmaengine_unmap_put(unmap);
801	mv_xor_free_chan_resources(dma_chan);
802out:
803	kfree(src);
804	kfree(dest);
805	return err;
806}
807
808#define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */
809static int
810mv_xor_xor_self_test(struct mv_xor_chan *mv_chan)
811{
812	int i, src_idx, ret;
813	struct page *dest;
814	struct page *xor_srcs[MV_XOR_NUM_SRC_TEST];
815	dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST];
816	dma_addr_t dest_dma;
817	struct dma_async_tx_descriptor *tx;
818	struct dmaengine_unmap_data *unmap;
819	struct dma_chan *dma_chan;
820	dma_cookie_t cookie;
821	u8 cmp_byte = 0;
822	u32 cmp_word;
823	int err = 0;
824	int src_count = MV_XOR_NUM_SRC_TEST;
825
826	for (src_idx = 0; src_idx < src_count; src_idx++) {
827		xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
828		if (!xor_srcs[src_idx]) {
829			while (src_idx--)
830				__free_page(xor_srcs[src_idx]);
831			return -ENOMEM;
832		}
833	}
834
835	dest = alloc_page(GFP_KERNEL);
836	if (!dest) {
837		while (src_idx--)
838			__free_page(xor_srcs[src_idx]);
839		return -ENOMEM;
840	}
841
842	/* Fill in src buffers */
843	for (src_idx = 0; src_idx < src_count; src_idx++) {
844		u8 *ptr = page_address(xor_srcs[src_idx]);
845		for (i = 0; i < PAGE_SIZE; i++)
846			ptr[i] = (1 << src_idx);
847	}
848
849	for (src_idx = 0; src_idx < src_count; src_idx++)
850		cmp_byte ^= (u8) (1 << src_idx);
851
852	cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
853		(cmp_byte << 8) | cmp_byte;
854
855	memset(page_address(dest), 0, PAGE_SIZE);
856
857	dma_chan = &mv_chan->dmachan;
858	if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
859		err = -ENODEV;
860		goto out;
861	}
862
863	unmap = dmaengine_get_unmap_data(dma_chan->device->dev, src_count + 1,
864					 GFP_KERNEL);
865	if (!unmap) {
866		err = -ENOMEM;
867		goto free_resources;
868	}
869
870	/* test xor */
871	for (i = 0; i < src_count; i++) {
872		unmap->addr[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
873					      0, PAGE_SIZE, DMA_TO_DEVICE);
874		dma_srcs[i] = unmap->addr[i];
875		ret = dma_mapping_error(dma_chan->device->dev, unmap->addr[i]);
876		if (ret) {
877			err = -ENOMEM;
878			goto free_resources;
879		}
880		unmap->to_cnt++;
881	}
882
883	unmap->addr[src_count] = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE,
884				      DMA_FROM_DEVICE);
885	dest_dma = unmap->addr[src_count];
886	ret = dma_mapping_error(dma_chan->device->dev, unmap->addr[src_count]);
887	if (ret) {
888		err = -ENOMEM;
889		goto free_resources;
890	}
891	unmap->from_cnt = 1;
892	unmap->len = PAGE_SIZE;
893
894	tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
895				 src_count, PAGE_SIZE, 0);
896	if (!tx) {
897		dev_err(dma_chan->device->dev,
898			"Self-test cannot prepare operation, disabling\n");
899		err = -ENODEV;
900		goto free_resources;
901	}
902
903	cookie = mv_xor_tx_submit(tx);
904	if (dma_submit_error(cookie)) {
905		dev_err(dma_chan->device->dev,
906			"Self-test submit error, disabling\n");
907		err = -ENODEV;
908		goto free_resources;
909	}
910
911	mv_xor_issue_pending(dma_chan);
912	async_tx_ack(tx);
913	msleep(8);
914
915	if (mv_xor_status(dma_chan, cookie, NULL) !=
916	    DMA_COMPLETE) {
917		dev_err(dma_chan->device->dev,
918			"Self-test xor timed out, disabling\n");
919		err = -ENODEV;
920		goto free_resources;
921	}
922
923	dma_sync_single_for_cpu(dma_chan->device->dev, dest_dma,
924				PAGE_SIZE, DMA_FROM_DEVICE);
925	for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
926		u32 *ptr = page_address(dest);
927		if (ptr[i] != cmp_word) {
928			dev_err(dma_chan->device->dev,
929				"Self-test xor failed compare, disabling. index %d, data %x, expected %x\n",
930				i, ptr[i], cmp_word);
931			err = -ENODEV;
932			goto free_resources;
933		}
934	}
935
936free_resources:
937	dmaengine_unmap_put(unmap);
938	mv_xor_free_chan_resources(dma_chan);
939out:
940	src_idx = src_count;
941	while (src_idx--)
942		__free_page(xor_srcs[src_idx]);
943	__free_page(dest);
944	return err;
945}
946
947static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan)
948{
949	struct dma_chan *chan, *_chan;
950	struct device *dev = mv_chan->dmadev.dev;
951
952	dma_async_device_unregister(&mv_chan->dmadev);
953
954	dma_free_coherent(dev, MV_XOR_POOL_SIZE,
955			  mv_chan->dma_desc_pool_virt, mv_chan->dma_desc_pool);
956	dma_unmap_single(dev, mv_chan->dummy_src_addr,
957			 MV_XOR_MIN_BYTE_COUNT, DMA_FROM_DEVICE);
958	dma_unmap_single(dev, mv_chan->dummy_dst_addr,
959			 MV_XOR_MIN_BYTE_COUNT, DMA_TO_DEVICE);
960
961	list_for_each_entry_safe(chan, _chan, &mv_chan->dmadev.channels,
962				 device_node) {
963		list_del(&chan->device_node);
964	}
965
966	free_irq(mv_chan->irq, mv_chan);
967
968	return 0;
969}
970
971static struct mv_xor_chan *
972mv_xor_channel_add(struct mv_xor_device *xordev,
973		   struct platform_device *pdev,
974		   int idx, dma_cap_mask_t cap_mask, int irq)
975{
976	int ret = 0;
977	struct mv_xor_chan *mv_chan;
978	struct dma_device *dma_dev;
979
980	mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL);
981	if (!mv_chan)
982		return ERR_PTR(-ENOMEM);
983
984	mv_chan->idx = idx;
985	mv_chan->irq = irq;
986
987	dma_dev = &mv_chan->dmadev;
988
989	/*
990	 * These source and destination dummy buffers are used to implement
991	 * a DMA_INTERRUPT operation as a minimum-sized XOR operation.
992	 * Hence, we only need to map the buffers at initialization-time.
993	 */
994	mv_chan->dummy_src_addr = dma_map_single(dma_dev->dev,
995		mv_chan->dummy_src, MV_XOR_MIN_BYTE_COUNT, DMA_FROM_DEVICE);
996	mv_chan->dummy_dst_addr = dma_map_single(dma_dev->dev,
997		mv_chan->dummy_dst, MV_XOR_MIN_BYTE_COUNT, DMA_TO_DEVICE);
998
999	/* allocate coherent memory for hardware descriptors
1000	 * note: writecombine gives slightly better performance, but
1001	 * requires that we explicitly flush the writes
1002	 */
1003	mv_chan->dma_desc_pool_virt =
1004	  dma_alloc_writecombine(&pdev->dev, MV_XOR_POOL_SIZE,
1005				 &mv_chan->dma_desc_pool, GFP_KERNEL);
1006	if (!mv_chan->dma_desc_pool_virt)
1007		return ERR_PTR(-ENOMEM);
1008
1009	/* discover transaction capabilites from the platform data */
1010	dma_dev->cap_mask = cap_mask;
1011
1012	INIT_LIST_HEAD(&dma_dev->channels);
1013
1014	/* set base routines */
1015	dma_dev->device_alloc_chan_resources = mv_xor_alloc_chan_resources;
1016	dma_dev->device_free_chan_resources = mv_xor_free_chan_resources;
1017	dma_dev->device_tx_status = mv_xor_status;
1018	dma_dev->device_issue_pending = mv_xor_issue_pending;
1019	dma_dev->dev = &pdev->dev;
1020
1021	/* set prep routines based on capability */
1022	if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
1023		dma_dev->device_prep_dma_interrupt = mv_xor_prep_dma_interrupt;
1024	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
1025		dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy;
1026	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
1027		dma_dev->max_xor = 8;
1028		dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor;
1029	}
1030
1031	mv_chan->mmr_base = xordev->xor_base;
1032	mv_chan->mmr_high_base = xordev->xor_high_base;
1033	tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long)
1034		     mv_chan);
1035
1036	/* clear errors before enabling interrupts */
1037	mv_xor_device_clear_err_status(mv_chan);
1038
1039	ret = request_irq(mv_chan->irq, mv_xor_interrupt_handler,
1040			  0, dev_name(&pdev->dev), mv_chan);
1041	if (ret)
1042		goto err_free_dma;
1043
1044	mv_chan_unmask_interrupts(mv_chan);
1045
1046	mv_set_mode(mv_chan, DMA_XOR);
1047
1048	spin_lock_init(&mv_chan->lock);
1049	INIT_LIST_HEAD(&mv_chan->chain);
1050	INIT_LIST_HEAD(&mv_chan->completed_slots);
1051	INIT_LIST_HEAD(&mv_chan->all_slots);
1052	mv_chan->dmachan.device = dma_dev;
1053	dma_cookie_init(&mv_chan->dmachan);
1054
1055	list_add_tail(&mv_chan->dmachan.device_node, &dma_dev->channels);
1056
1057	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
1058		ret = mv_xor_memcpy_self_test(mv_chan);
1059		dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
1060		if (ret)
1061			goto err_free_irq;
1062	}
1063
1064	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
1065		ret = mv_xor_xor_self_test(mv_chan);
1066		dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
1067		if (ret)
1068			goto err_free_irq;
1069	}
1070
1071	dev_info(&pdev->dev, "Marvell XOR: ( %s%s%s)\n",
1072		 dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
1073		 dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
1074		 dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
1075
1076	dma_async_device_register(dma_dev);
1077	return mv_chan;
1078
1079err_free_irq:
1080	free_irq(mv_chan->irq, mv_chan);
1081 err_free_dma:
1082	dma_free_coherent(&pdev->dev, MV_XOR_POOL_SIZE,
1083			  mv_chan->dma_desc_pool_virt, mv_chan->dma_desc_pool);
1084	return ERR_PTR(ret);
1085}
1086
1087static void
1088mv_xor_conf_mbus_windows(struct mv_xor_device *xordev,
1089			 const struct mbus_dram_target_info *dram)
1090{
1091	void __iomem *base = xordev->xor_high_base;
1092	u32 win_enable = 0;
1093	int i;
1094
1095	for (i = 0; i < 8; i++) {
1096		writel(0, base + WINDOW_BASE(i));
1097		writel(0, base + WINDOW_SIZE(i));
1098		if (i < 4)
1099			writel(0, base + WINDOW_REMAP_HIGH(i));
1100	}
1101
1102	for (i = 0; i < dram->num_cs; i++) {
1103		const struct mbus_dram_window *cs = dram->cs + i;
1104
1105		writel((cs->base & 0xffff0000) |
1106		       (cs->mbus_attr << 8) |
1107		       dram->mbus_dram_target_id, base + WINDOW_BASE(i));
1108		writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i));
1109
1110		win_enable |= (1 << i);
1111		win_enable |= 3 << (16 + (2 * i));
1112	}
1113
1114	writel(win_enable, base + WINDOW_BAR_ENABLE(0));
1115	writel(win_enable, base + WINDOW_BAR_ENABLE(1));
1116	writel(0, base + WINDOW_OVERRIDE_CTRL(0));
1117	writel(0, base + WINDOW_OVERRIDE_CTRL(1));
1118}
1119
1120static int mv_xor_probe(struct platform_device *pdev)
1121{
1122	const struct mbus_dram_target_info *dram;
1123	struct mv_xor_device *xordev;
1124	struct mv_xor_platform_data *pdata = dev_get_platdata(&pdev->dev);
1125	struct resource *res;
1126	int i, ret;
1127
1128	dev_notice(&pdev->dev, "Marvell shared XOR driver\n");
1129
1130	xordev = devm_kzalloc(&pdev->dev, sizeof(*xordev), GFP_KERNEL);
1131	if (!xordev)
1132		return -ENOMEM;
1133
1134	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1135	if (!res)
1136		return -ENODEV;
1137
1138	xordev->xor_base = devm_ioremap(&pdev->dev, res->start,
1139					resource_size(res));
1140	if (!xordev->xor_base)
1141		return -EBUSY;
1142
1143	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
1144	if (!res)
1145		return -ENODEV;
1146
1147	xordev->xor_high_base = devm_ioremap(&pdev->dev, res->start,
1148					     resource_size(res));
1149	if (!xordev->xor_high_base)
1150		return -EBUSY;
1151
1152	platform_set_drvdata(pdev, xordev);
1153
1154	/*
1155	 * (Re-)program MBUS remapping windows if we are asked to.
1156	 */
1157	dram = mv_mbus_dram_info();
1158	if (dram)
1159		mv_xor_conf_mbus_windows(xordev, dram);
1160
1161	/* Not all platforms can gate the clock, so it is not
1162	 * an error if the clock does not exists.
1163	 */
1164	xordev->clk = clk_get(&pdev->dev, NULL);
1165	if (!IS_ERR(xordev->clk))
1166		clk_prepare_enable(xordev->clk);
1167
1168	if (pdev->dev.of_node) {
1169		struct device_node *np;
1170		int i = 0;
1171
1172		for_each_child_of_node(pdev->dev.of_node, np) {
1173			struct mv_xor_chan *chan;
1174			dma_cap_mask_t cap_mask;
1175			int irq;
1176
1177			dma_cap_zero(cap_mask);
1178			if (of_property_read_bool(np, "dmacap,memcpy"))
1179				dma_cap_set(DMA_MEMCPY, cap_mask);
1180			if (of_property_read_bool(np, "dmacap,xor"))
1181				dma_cap_set(DMA_XOR, cap_mask);
1182			if (of_property_read_bool(np, "dmacap,interrupt"))
1183				dma_cap_set(DMA_INTERRUPT, cap_mask);
1184
1185			irq = irq_of_parse_and_map(np, 0);
1186			if (!irq) {
1187				ret = -ENODEV;
1188				goto err_channel_add;
1189			}
1190
1191			chan = mv_xor_channel_add(xordev, pdev, i,
1192						  cap_mask, irq);
1193			if (IS_ERR(chan)) {
1194				ret = PTR_ERR(chan);
1195				irq_dispose_mapping(irq);
1196				goto err_channel_add;
1197			}
1198
1199			xordev->channels[i] = chan;
1200			i++;
1201		}
1202	} else if (pdata && pdata->channels) {
1203		for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) {
1204			struct mv_xor_channel_data *cd;
1205			struct mv_xor_chan *chan;
1206			int irq;
1207
1208			cd = &pdata->channels[i];
1209			if (!cd) {
1210				ret = -ENODEV;
1211				goto err_channel_add;
1212			}
1213
1214			irq = platform_get_irq(pdev, i);
1215			if (irq < 0) {
1216				ret = irq;
1217				goto err_channel_add;
1218			}
1219
1220			chan = mv_xor_channel_add(xordev, pdev, i,
1221						  cd->cap_mask, irq);
1222			if (IS_ERR(chan)) {
1223				ret = PTR_ERR(chan);
1224				goto err_channel_add;
1225			}
1226
1227			xordev->channels[i] = chan;
1228		}
1229	}
1230
1231	return 0;
1232
1233err_channel_add:
1234	for (i = 0; i < MV_XOR_MAX_CHANNELS; i++)
1235		if (xordev->channels[i]) {
1236			mv_xor_channel_remove(xordev->channels[i]);
1237			if (pdev->dev.of_node)
1238				irq_dispose_mapping(xordev->channels[i]->irq);
1239		}
1240
1241	if (!IS_ERR(xordev->clk)) {
1242		clk_disable_unprepare(xordev->clk);
1243		clk_put(xordev->clk);
1244	}
1245
1246	return ret;
1247}
1248
1249static int mv_xor_remove(struct platform_device *pdev)
1250{
1251	struct mv_xor_device *xordev = platform_get_drvdata(pdev);
1252	int i;
1253
1254	for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) {
1255		if (xordev->channels[i])
1256			mv_xor_channel_remove(xordev->channels[i]);
1257	}
1258
1259	if (!IS_ERR(xordev->clk)) {
1260		clk_disable_unprepare(xordev->clk);
1261		clk_put(xordev->clk);
1262	}
1263
1264	return 0;
1265}
1266
1267#ifdef CONFIG_OF
1268static const struct of_device_id mv_xor_dt_ids[] = {
1269       { .compatible = "marvell,orion-xor", },
1270       {},
1271};
1272MODULE_DEVICE_TABLE(of, mv_xor_dt_ids);
1273#endif
1274
1275static struct platform_driver mv_xor_driver = {
1276	.probe		= mv_xor_probe,
1277	.remove		= mv_xor_remove,
1278	.driver		= {
1279		.name	        = MV_XOR_NAME,
1280		.of_match_table = of_match_ptr(mv_xor_dt_ids),
1281	},
1282};
1283
1284
1285static int __init mv_xor_init(void)
1286{
1287	return platform_driver_register(&mv_xor_driver);
1288}
1289module_init(mv_xor_init);
1290
1291/* it's currently unsafe to unload this module */
1292#if 0
1293static void __exit mv_xor_exit(void)
1294{
1295	platform_driver_unregister(&mv_xor_driver);
1296	return;
1297}
1298
1299module_exit(mv_xor_exit);
1300#endif
1301
1302MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>");
1303MODULE_DESCRIPTION("DMA engine driver for Marvell's XOR engine");
1304MODULE_LICENSE("GPL");
1305