1/*
2 * AMD Cryptographic Coprocessor (CCP) driver
3 *
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
5 *
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/pci.h>
16#include <linux/pci_ids.h>
17#include <linux/kthread.h>
18#include <linux/sched.h>
19#include <linux/interrupt.h>
20#include <linux/spinlock.h>
21#include <linux/mutex.h>
22#include <linux/delay.h>
23#include <linux/ccp.h>
24#include <linux/scatterlist.h>
25#include <crypto/scatterwalk.h>
26#include <crypto/sha.h>
27
28#include "ccp-dev.h"
29
30enum ccp_memtype {
31	CCP_MEMTYPE_SYSTEM = 0,
32	CCP_MEMTYPE_KSB,
33	CCP_MEMTYPE_LOCAL,
34	CCP_MEMTYPE__LAST,
35};
36
37struct ccp_dma_info {
38	dma_addr_t address;
39	unsigned int offset;
40	unsigned int length;
41	enum dma_data_direction dir;
42};
43
44struct ccp_dm_workarea {
45	struct device *dev;
46	struct dma_pool *dma_pool;
47	unsigned int length;
48
49	u8 *address;
50	struct ccp_dma_info dma;
51};
52
53struct ccp_sg_workarea {
54	struct scatterlist *sg;
55	unsigned int nents;
56	unsigned int length;
57
58	struct scatterlist *dma_sg;
59	struct device *dma_dev;
60	unsigned int dma_count;
61	enum dma_data_direction dma_dir;
62
63	unsigned int sg_used;
64
65	u64 bytes_left;
66};
67
68struct ccp_data {
69	struct ccp_sg_workarea sg_wa;
70	struct ccp_dm_workarea dm_wa;
71};
72
73struct ccp_mem {
74	enum ccp_memtype type;
75	union {
76		struct ccp_dma_info dma;
77		u32 ksb;
78	} u;
79};
80
81struct ccp_aes_op {
82	enum ccp_aes_type type;
83	enum ccp_aes_mode mode;
84	enum ccp_aes_action action;
85};
86
87struct ccp_xts_aes_op {
88	enum ccp_aes_action action;
89	enum ccp_xts_aes_unit_size unit_size;
90};
91
92struct ccp_sha_op {
93	enum ccp_sha_type type;
94	u64 msg_bits;
95};
96
97struct ccp_rsa_op {
98	u32 mod_size;
99	u32 input_len;
100};
101
102struct ccp_passthru_op {
103	enum ccp_passthru_bitwise bit_mod;
104	enum ccp_passthru_byteswap byte_swap;
105};
106
107struct ccp_ecc_op {
108	enum ccp_ecc_function function;
109};
110
111struct ccp_op {
112	struct ccp_cmd_queue *cmd_q;
113
114	u32 jobid;
115	u32 ioc;
116	u32 soc;
117	u32 ksb_key;
118	u32 ksb_ctx;
119	u32 init;
120	u32 eom;
121
122	struct ccp_mem src;
123	struct ccp_mem dst;
124
125	union {
126		struct ccp_aes_op aes;
127		struct ccp_xts_aes_op xts;
128		struct ccp_sha_op sha;
129		struct ccp_rsa_op rsa;
130		struct ccp_passthru_op passthru;
131		struct ccp_ecc_op ecc;
132	} u;
133};
134
135/* SHA initial context values */
136static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
137	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
138	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
139	cpu_to_be32(SHA1_H4), 0, 0, 0,
140};
141
142static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
143	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
144	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
145	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
146	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
147};
148
149static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
150	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
151	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
152	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
153	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
154};
155
156/* The CCP cannot perform zero-length sha operations so the caller
157 * is required to buffer data for the final operation.  However, a
158 * sha operation for a message with a total length of zero is valid
159 * so known values are required to supply the result.
160 */
161static const u8 ccp_sha1_zero[CCP_SHA_CTXSIZE] = {
162	0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d,
163	0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90,
164	0xaf, 0xd8, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00,
165	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
166};
167
168static const u8 ccp_sha224_zero[CCP_SHA_CTXSIZE] = {
169	0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9,
170	0x47, 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4,
171	0x15, 0xa2, 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a,
172	0xc5, 0xb3, 0xe4, 0x2f, 0x00, 0x00, 0x00, 0x00,
173};
174
175static const u8 ccp_sha256_zero[CCP_SHA_CTXSIZE] = {
176	0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
177	0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
178	0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
179	0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55,
180};
181
182static u32 ccp_addr_lo(struct ccp_dma_info *info)
183{
184	return lower_32_bits(info->address + info->offset);
185}
186
187static u32 ccp_addr_hi(struct ccp_dma_info *info)
188{
189	return upper_32_bits(info->address + info->offset) & 0x0000ffff;
190}
191
192static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
193{
194	struct ccp_cmd_queue *cmd_q = op->cmd_q;
195	struct ccp_device *ccp = cmd_q->ccp;
196	void __iomem *cr_addr;
197	u32 cr0, cmd;
198	unsigned int i;
199	int ret = 0;
200
201	/* We could read a status register to see how many free slots
202	 * are actually available, but reading that register resets it
203	 * and you could lose some error information.
204	 */
205	cmd_q->free_slots--;
206
207	cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
208	      | (op->jobid << REQ0_JOBID_SHIFT)
209	      | REQ0_WAIT_FOR_WRITE;
210
211	if (op->soc)
212		cr0 |= REQ0_STOP_ON_COMPLETE
213		       | REQ0_INT_ON_COMPLETE;
214
215	if (op->ioc || !cmd_q->free_slots)
216		cr0 |= REQ0_INT_ON_COMPLETE;
217
218	/* Start at CMD_REQ1 */
219	cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
220
221	mutex_lock(&ccp->req_mutex);
222
223	/* Write CMD_REQ1 through CMD_REQx first */
224	for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
225		iowrite32(*(cr + i), cr_addr);
226
227	/* Tell the CCP to start */
228	wmb();
229	iowrite32(cr0, ccp->io_regs + CMD_REQ0);
230
231	mutex_unlock(&ccp->req_mutex);
232
233	if (cr0 & REQ0_INT_ON_COMPLETE) {
234		/* Wait for the job to complete */
235		ret = wait_event_interruptible(cmd_q->int_queue,
236					       cmd_q->int_rcvd);
237		if (ret || cmd_q->cmd_error) {
238			/* On error delete all related jobs from the queue */
239			cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
240			      | op->jobid;
241
242			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
243
244			if (!ret)
245				ret = -EIO;
246		} else if (op->soc) {
247			/* Delete just head job from the queue on SoC */
248			cmd = DEL_Q_ACTIVE
249			      | (cmd_q->id << DEL_Q_ID_SHIFT)
250			      | op->jobid;
251
252			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
253		}
254
255		cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
256
257		cmd_q->int_rcvd = 0;
258	}
259
260	return ret;
261}
262
263static int ccp_perform_aes(struct ccp_op *op)
264{
265	u32 cr[6];
266
267	/* Fill out the register contents for REQ1 through REQ6 */
268	cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
269		| (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
270		| (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
271		| (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
272		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
273	cr[1] = op->src.u.dma.length - 1;
274	cr[2] = ccp_addr_lo(&op->src.u.dma);
275	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
276		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
277		| ccp_addr_hi(&op->src.u.dma);
278	cr[4] = ccp_addr_lo(&op->dst.u.dma);
279	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
280		| ccp_addr_hi(&op->dst.u.dma);
281
282	if (op->u.aes.mode == CCP_AES_MODE_CFB)
283		cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
284
285	if (op->eom)
286		cr[0] |= REQ1_EOM;
287
288	if (op->init)
289		cr[0] |= REQ1_INIT;
290
291	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
292}
293
294static int ccp_perform_xts_aes(struct ccp_op *op)
295{
296	u32 cr[6];
297
298	/* Fill out the register contents for REQ1 through REQ6 */
299	cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
300		| (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
301		| (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
302		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
303	cr[1] = op->src.u.dma.length - 1;
304	cr[2] = ccp_addr_lo(&op->src.u.dma);
305	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
306		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
307		| ccp_addr_hi(&op->src.u.dma);
308	cr[4] = ccp_addr_lo(&op->dst.u.dma);
309	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
310		| ccp_addr_hi(&op->dst.u.dma);
311
312	if (op->eom)
313		cr[0] |= REQ1_EOM;
314
315	if (op->init)
316		cr[0] |= REQ1_INIT;
317
318	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
319}
320
321static int ccp_perform_sha(struct ccp_op *op)
322{
323	u32 cr[6];
324
325	/* Fill out the register contents for REQ1 through REQ6 */
326	cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
327		| (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
328		| REQ1_INIT;
329	cr[1] = op->src.u.dma.length - 1;
330	cr[2] = ccp_addr_lo(&op->src.u.dma);
331	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
332		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
333		| ccp_addr_hi(&op->src.u.dma);
334
335	if (op->eom) {
336		cr[0] |= REQ1_EOM;
337		cr[4] = lower_32_bits(op->u.sha.msg_bits);
338		cr[5] = upper_32_bits(op->u.sha.msg_bits);
339	} else {
340		cr[4] = 0;
341		cr[5] = 0;
342	}
343
344	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
345}
346
347static int ccp_perform_rsa(struct ccp_op *op)
348{
349	u32 cr[6];
350
351	/* Fill out the register contents for REQ1 through REQ6 */
352	cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
353		| (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
354		| (op->ksb_key << REQ1_KEY_KSB_SHIFT)
355		| REQ1_EOM;
356	cr[1] = op->u.rsa.input_len - 1;
357	cr[2] = ccp_addr_lo(&op->src.u.dma);
358	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
359		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
360		| ccp_addr_hi(&op->src.u.dma);
361	cr[4] = ccp_addr_lo(&op->dst.u.dma);
362	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
363		| ccp_addr_hi(&op->dst.u.dma);
364
365	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
366}
367
368static int ccp_perform_passthru(struct ccp_op *op)
369{
370	u32 cr[6];
371
372	/* Fill out the register contents for REQ1 through REQ6 */
373	cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
374		| (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
375		| (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
376
377	if (op->src.type == CCP_MEMTYPE_SYSTEM)
378		cr[1] = op->src.u.dma.length - 1;
379	else
380		cr[1] = op->dst.u.dma.length - 1;
381
382	if (op->src.type == CCP_MEMTYPE_SYSTEM) {
383		cr[2] = ccp_addr_lo(&op->src.u.dma);
384		cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
385			| ccp_addr_hi(&op->src.u.dma);
386
387		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
388			cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
389	} else {
390		cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
391		cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
392	}
393
394	if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
395		cr[4] = ccp_addr_lo(&op->dst.u.dma);
396		cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
397			| ccp_addr_hi(&op->dst.u.dma);
398	} else {
399		cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
400		cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
401	}
402
403	if (op->eom)
404		cr[0] |= REQ1_EOM;
405
406	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
407}
408
409static int ccp_perform_ecc(struct ccp_op *op)
410{
411	u32 cr[6];
412
413	/* Fill out the register contents for REQ1 through REQ6 */
414	cr[0] = REQ1_ECC_AFFINE_CONVERT
415		| (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
416		| (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
417		| REQ1_EOM;
418	cr[1] = op->src.u.dma.length - 1;
419	cr[2] = ccp_addr_lo(&op->src.u.dma);
420	cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
421		| ccp_addr_hi(&op->src.u.dma);
422	cr[4] = ccp_addr_lo(&op->dst.u.dma);
423	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
424		| ccp_addr_hi(&op->dst.u.dma);
425
426	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
427}
428
429static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
430{
431	int start;
432
433	for (;;) {
434		mutex_lock(&ccp->ksb_mutex);
435
436		start = (u32)bitmap_find_next_zero_area(ccp->ksb,
437							ccp->ksb_count,
438							ccp->ksb_start,
439							count, 0);
440		if (start <= ccp->ksb_count) {
441			bitmap_set(ccp->ksb, start, count);
442
443			mutex_unlock(&ccp->ksb_mutex);
444			break;
445		}
446
447		ccp->ksb_avail = 0;
448
449		mutex_unlock(&ccp->ksb_mutex);
450
451		/* Wait for KSB entries to become available */
452		if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
453			return 0;
454	}
455
456	return KSB_START + start;
457}
458
459static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
460			 unsigned int count)
461{
462	if (!start)
463		return;
464
465	mutex_lock(&ccp->ksb_mutex);
466
467	bitmap_clear(ccp->ksb, start - KSB_START, count);
468
469	ccp->ksb_avail = 1;
470
471	mutex_unlock(&ccp->ksb_mutex);
472
473	wake_up_interruptible_all(&ccp->ksb_queue);
474}
475
476static u32 ccp_gen_jobid(struct ccp_device *ccp)
477{
478	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
479}
480
481static void ccp_sg_free(struct ccp_sg_workarea *wa)
482{
483	if (wa->dma_count)
484		dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
485
486	wa->dma_count = 0;
487}
488
489static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
490				struct scatterlist *sg, u64 len,
491				enum dma_data_direction dma_dir)
492{
493	memset(wa, 0, sizeof(*wa));
494
495	wa->sg = sg;
496	if (!sg)
497		return 0;
498
499	wa->nents = sg_nents(sg);
500	wa->length = sg->length;
501	wa->bytes_left = len;
502	wa->sg_used = 0;
503
504	if (len == 0)
505		return 0;
506
507	if (dma_dir == DMA_NONE)
508		return 0;
509
510	wa->dma_sg = sg;
511	wa->dma_dev = dev;
512	wa->dma_dir = dma_dir;
513	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
514	if (!wa->dma_count)
515		return -ENOMEM;
516
517	return 0;
518}
519
520static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
521{
522	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
523
524	if (!wa->sg)
525		return;
526
527	wa->sg_used += nbytes;
528	wa->bytes_left -= nbytes;
529	if (wa->sg_used == wa->sg->length) {
530		wa->sg = sg_next(wa->sg);
531		wa->sg_used = 0;
532	}
533}
534
535static void ccp_dm_free(struct ccp_dm_workarea *wa)
536{
537	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
538		if (wa->address)
539			dma_pool_free(wa->dma_pool, wa->address,
540				      wa->dma.address);
541	} else {
542		if (wa->dma.address)
543			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
544					 wa->dma.dir);
545		kfree(wa->address);
546	}
547
548	wa->address = NULL;
549	wa->dma.address = 0;
550}
551
552static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
553				struct ccp_cmd_queue *cmd_q,
554				unsigned int len,
555				enum dma_data_direction dir)
556{
557	memset(wa, 0, sizeof(*wa));
558
559	if (!len)
560		return 0;
561
562	wa->dev = cmd_q->ccp->dev;
563	wa->length = len;
564
565	if (len <= CCP_DMAPOOL_MAX_SIZE) {
566		wa->dma_pool = cmd_q->dma_pool;
567
568		wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
569					     &wa->dma.address);
570		if (!wa->address)
571			return -ENOMEM;
572
573		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
574
575		memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
576	} else {
577		wa->address = kzalloc(len, GFP_KERNEL);
578		if (!wa->address)
579			return -ENOMEM;
580
581		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
582						 dir);
583		if (!wa->dma.address)
584			return -ENOMEM;
585
586		wa->dma.length = len;
587	}
588	wa->dma.dir = dir;
589
590	return 0;
591}
592
593static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
594			    struct scatterlist *sg, unsigned int sg_offset,
595			    unsigned int len)
596{
597	WARN_ON(!wa->address);
598
599	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
600				 0);
601}
602
603static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
604			    struct scatterlist *sg, unsigned int sg_offset,
605			    unsigned int len)
606{
607	WARN_ON(!wa->address);
608
609	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
610				 1);
611}
612
613static void ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
614				    struct scatterlist *sg,
615				    unsigned int len, unsigned int se_len,
616				    bool sign_extend)
617{
618	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
619	u8 buffer[CCP_REVERSE_BUF_SIZE];
620
621	BUG_ON(se_len > sizeof(buffer));
622
623	sg_offset = len;
624	dm_offset = 0;
625	nbytes = len;
626	while (nbytes) {
627		ksb_len = min_t(unsigned int, nbytes, se_len);
628		sg_offset -= ksb_len;
629
630		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
631		for (i = 0; i < ksb_len; i++)
632			wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
633
634		dm_offset += ksb_len;
635		nbytes -= ksb_len;
636
637		if ((ksb_len != se_len) && sign_extend) {
638			/* Must sign-extend to nearest sign-extend length */
639			if (wa->address[dm_offset - 1] & 0x80)
640				memset(wa->address + dm_offset, 0xff,
641				       se_len - ksb_len);
642		}
643	}
644}
645
646static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
647				    struct scatterlist *sg,
648				    unsigned int len)
649{
650	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
651	u8 buffer[CCP_REVERSE_BUF_SIZE];
652
653	sg_offset = 0;
654	dm_offset = len;
655	nbytes = len;
656	while (nbytes) {
657		ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
658		dm_offset -= ksb_len;
659
660		for (i = 0; i < ksb_len; i++)
661			buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
662		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
663
664		sg_offset += ksb_len;
665		nbytes -= ksb_len;
666	}
667}
668
669static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
670{
671	ccp_dm_free(&data->dm_wa);
672	ccp_sg_free(&data->sg_wa);
673}
674
675static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
676			 struct scatterlist *sg, u64 sg_len,
677			 unsigned int dm_len,
678			 enum dma_data_direction dir)
679{
680	int ret;
681
682	memset(data, 0, sizeof(*data));
683
684	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
685				   dir);
686	if (ret)
687		goto e_err;
688
689	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
690	if (ret)
691		goto e_err;
692
693	return 0;
694
695e_err:
696	ccp_free_data(data, cmd_q);
697
698	return ret;
699}
700
701static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
702{
703	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
704	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
705	unsigned int buf_count, nbytes;
706
707	/* Clear the buffer if setting it */
708	if (!from)
709		memset(dm_wa->address, 0, dm_wa->length);
710
711	if (!sg_wa->sg)
712		return 0;
713
714	/* Perform the copy operation
715	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
716	 *   an unsigned int
717	 */
718	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
719	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
720				 nbytes, from);
721
722	/* Update the structures and generate the count */
723	buf_count = 0;
724	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
725		nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
726			     dm_wa->length - buf_count);
727		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
728
729		buf_count += nbytes;
730		ccp_update_sg_workarea(sg_wa, nbytes);
731	}
732
733	return buf_count;
734}
735
736static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
737{
738	return ccp_queue_buf(data, 0);
739}
740
741static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
742{
743	return ccp_queue_buf(data, 1);
744}
745
746static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
747			     struct ccp_op *op, unsigned int block_size,
748			     bool blocksize_op)
749{
750	unsigned int sg_src_len, sg_dst_len, op_len;
751
752	/* The CCP can only DMA from/to one address each per operation. This
753	 * requires that we find the smallest DMA area between the source
754	 * and destination. The resulting len values will always be <= UINT_MAX
755	 * because the dma length is an unsigned int.
756	 */
757	sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
758	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
759
760	if (dst) {
761		sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
762		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
763		op_len = min(sg_src_len, sg_dst_len);
764	} else {
765		op_len = sg_src_len;
766	}
767
768	/* The data operation length will be at least block_size in length
769	 * or the smaller of available sg room remaining for the source or
770	 * the destination
771	 */
772	op_len = max(op_len, block_size);
773
774	/* Unless we have to buffer data, there's no reason to wait */
775	op->soc = 0;
776
777	if (sg_src_len < block_size) {
778		/* Not enough data in the sg element, so it
779		 * needs to be buffered into a blocksize chunk
780		 */
781		int cp_len = ccp_fill_queue_buf(src);
782
783		op->soc = 1;
784		op->src.u.dma.address = src->dm_wa.dma.address;
785		op->src.u.dma.offset = 0;
786		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
787	} else {
788		/* Enough data in the sg element, but we need to
789		 * adjust for any previously copied data
790		 */
791		op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
792		op->src.u.dma.offset = src->sg_wa.sg_used;
793		op->src.u.dma.length = op_len & ~(block_size - 1);
794
795		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
796	}
797
798	if (dst) {
799		if (sg_dst_len < block_size) {
800			/* Not enough room in the sg element or we're on the
801			 * last piece of data (when using padding), so the
802			 * output needs to be buffered into a blocksize chunk
803			 */
804			op->soc = 1;
805			op->dst.u.dma.address = dst->dm_wa.dma.address;
806			op->dst.u.dma.offset = 0;
807			op->dst.u.dma.length = op->src.u.dma.length;
808		} else {
809			/* Enough room in the sg element, but we need to
810			 * adjust for any previously used area
811			 */
812			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
813			op->dst.u.dma.offset = dst->sg_wa.sg_used;
814			op->dst.u.dma.length = op->src.u.dma.length;
815		}
816	}
817}
818
819static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
820			     struct ccp_op *op)
821{
822	op->init = 0;
823
824	if (dst) {
825		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
826			ccp_empty_queue_buf(dst);
827		else
828			ccp_update_sg_workarea(&dst->sg_wa,
829					       op->dst.u.dma.length);
830	}
831}
832
833static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
834				struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
835				u32 byte_swap, bool from)
836{
837	struct ccp_op op;
838
839	memset(&op, 0, sizeof(op));
840
841	op.cmd_q = cmd_q;
842	op.jobid = jobid;
843	op.eom = 1;
844
845	if (from) {
846		op.soc = 1;
847		op.src.type = CCP_MEMTYPE_KSB;
848		op.src.u.ksb = ksb;
849		op.dst.type = CCP_MEMTYPE_SYSTEM;
850		op.dst.u.dma.address = wa->dma.address;
851		op.dst.u.dma.length = wa->length;
852	} else {
853		op.src.type = CCP_MEMTYPE_SYSTEM;
854		op.src.u.dma.address = wa->dma.address;
855		op.src.u.dma.length = wa->length;
856		op.dst.type = CCP_MEMTYPE_KSB;
857		op.dst.u.ksb = ksb;
858	}
859
860	op.u.passthru.byte_swap = byte_swap;
861
862	return ccp_perform_passthru(&op);
863}
864
865static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
866			   struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
867			   u32 byte_swap)
868{
869	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
870}
871
872static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
873			     struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
874			     u32 byte_swap)
875{
876	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
877}
878
879static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
880				struct ccp_cmd *cmd)
881{
882	struct ccp_aes_engine *aes = &cmd->u.aes;
883	struct ccp_dm_workarea key, ctx;
884	struct ccp_data src;
885	struct ccp_op op;
886	unsigned int dm_offset;
887	int ret;
888
889	if (!((aes->key_len == AES_KEYSIZE_128) ||
890	      (aes->key_len == AES_KEYSIZE_192) ||
891	      (aes->key_len == AES_KEYSIZE_256)))
892		return -EINVAL;
893
894	if (aes->src_len & (AES_BLOCK_SIZE - 1))
895		return -EINVAL;
896
897	if (aes->iv_len != AES_BLOCK_SIZE)
898		return -EINVAL;
899
900	if (!aes->key || !aes->iv || !aes->src)
901		return -EINVAL;
902
903	if (aes->cmac_final) {
904		if (aes->cmac_key_len != AES_BLOCK_SIZE)
905			return -EINVAL;
906
907		if (!aes->cmac_key)
908			return -EINVAL;
909	}
910
911	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
912	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
913
914	ret = -EIO;
915	memset(&op, 0, sizeof(op));
916	op.cmd_q = cmd_q;
917	op.jobid = ccp_gen_jobid(cmd_q->ccp);
918	op.ksb_key = cmd_q->ksb_key;
919	op.ksb_ctx = cmd_q->ksb_ctx;
920	op.init = 1;
921	op.u.aes.type = aes->type;
922	op.u.aes.mode = aes->mode;
923	op.u.aes.action = aes->action;
924
925	/* All supported key sizes fit in a single (32-byte) KSB entry
926	 * and must be in little endian format. Use the 256-bit byte
927	 * swap passthru option to convert from big endian to little
928	 * endian.
929	 */
930	ret = ccp_init_dm_workarea(&key, cmd_q,
931				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
932				   DMA_TO_DEVICE);
933	if (ret)
934		return ret;
935
936	dm_offset = CCP_KSB_BYTES - aes->key_len;
937	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
938	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
939			      CCP_PASSTHRU_BYTESWAP_256BIT);
940	if (ret) {
941		cmd->engine_error = cmd_q->cmd_error;
942		goto e_key;
943	}
944
945	/* The AES context fits in a single (32-byte) KSB entry and
946	 * must be in little endian format. Use the 256-bit byte swap
947	 * passthru option to convert from big endian to little endian.
948	 */
949	ret = ccp_init_dm_workarea(&ctx, cmd_q,
950				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
951				   DMA_BIDIRECTIONAL);
952	if (ret)
953		goto e_key;
954
955	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
956	ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
957	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
958			      CCP_PASSTHRU_BYTESWAP_256BIT);
959	if (ret) {
960		cmd->engine_error = cmd_q->cmd_error;
961		goto e_ctx;
962	}
963
964	/* Send data to the CCP AES engine */
965	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
966			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
967	if (ret)
968		goto e_ctx;
969
970	while (src.sg_wa.bytes_left) {
971		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
972		if (aes->cmac_final && !src.sg_wa.bytes_left) {
973			op.eom = 1;
974
975			/* Push the K1/K2 key to the CCP now */
976			ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
977						op.ksb_ctx,
978						CCP_PASSTHRU_BYTESWAP_256BIT);
979			if (ret) {
980				cmd->engine_error = cmd_q->cmd_error;
981				goto e_src;
982			}
983
984			ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
985					aes->cmac_key_len);
986			ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
987					      CCP_PASSTHRU_BYTESWAP_256BIT);
988			if (ret) {
989				cmd->engine_error = cmd_q->cmd_error;
990				goto e_src;
991			}
992		}
993
994		ret = ccp_perform_aes(&op);
995		if (ret) {
996			cmd->engine_error = cmd_q->cmd_error;
997			goto e_src;
998		}
999
1000		ccp_process_data(&src, NULL, &op);
1001	}
1002
1003	/* Retrieve the AES context - convert from LE to BE using
1004	 * 32-byte (256-bit) byteswapping
1005	 */
1006	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1007				CCP_PASSTHRU_BYTESWAP_256BIT);
1008	if (ret) {
1009		cmd->engine_error = cmd_q->cmd_error;
1010		goto e_src;
1011	}
1012
1013	/* ...but we only need AES_BLOCK_SIZE bytes */
1014	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1015	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1016
1017e_src:
1018	ccp_free_data(&src, cmd_q);
1019
1020e_ctx:
1021	ccp_dm_free(&ctx);
1022
1023e_key:
1024	ccp_dm_free(&key);
1025
1026	return ret;
1027}
1028
1029static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1030{
1031	struct ccp_aes_engine *aes = &cmd->u.aes;
1032	struct ccp_dm_workarea key, ctx;
1033	struct ccp_data src, dst;
1034	struct ccp_op op;
1035	unsigned int dm_offset;
1036	bool in_place = false;
1037	int ret;
1038
1039	if (aes->mode == CCP_AES_MODE_CMAC)
1040		return ccp_run_aes_cmac_cmd(cmd_q, cmd);
1041
1042	if (!((aes->key_len == AES_KEYSIZE_128) ||
1043	      (aes->key_len == AES_KEYSIZE_192) ||
1044	      (aes->key_len == AES_KEYSIZE_256)))
1045		return -EINVAL;
1046
1047	if (((aes->mode == CCP_AES_MODE_ECB) ||
1048	     (aes->mode == CCP_AES_MODE_CBC) ||
1049	     (aes->mode == CCP_AES_MODE_CFB)) &&
1050	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
1051		return -EINVAL;
1052
1053	if (!aes->key || !aes->src || !aes->dst)
1054		return -EINVAL;
1055
1056	if (aes->mode != CCP_AES_MODE_ECB) {
1057		if (aes->iv_len != AES_BLOCK_SIZE)
1058			return -EINVAL;
1059
1060		if (!aes->iv)
1061			return -EINVAL;
1062	}
1063
1064	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
1065	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
1066
1067	ret = -EIO;
1068	memset(&op, 0, sizeof(op));
1069	op.cmd_q = cmd_q;
1070	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1071	op.ksb_key = cmd_q->ksb_key;
1072	op.ksb_ctx = cmd_q->ksb_ctx;
1073	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
1074	op.u.aes.type = aes->type;
1075	op.u.aes.mode = aes->mode;
1076	op.u.aes.action = aes->action;
1077
1078	/* All supported key sizes fit in a single (32-byte) KSB entry
1079	 * and must be in little endian format. Use the 256-bit byte
1080	 * swap passthru option to convert from big endian to little
1081	 * endian.
1082	 */
1083	ret = ccp_init_dm_workarea(&key, cmd_q,
1084				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1085				   DMA_TO_DEVICE);
1086	if (ret)
1087		return ret;
1088
1089	dm_offset = CCP_KSB_BYTES - aes->key_len;
1090	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
1091	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1092			      CCP_PASSTHRU_BYTESWAP_256BIT);
1093	if (ret) {
1094		cmd->engine_error = cmd_q->cmd_error;
1095		goto e_key;
1096	}
1097
1098	/* The AES context fits in a single (32-byte) KSB entry and
1099	 * must be in little endian format. Use the 256-bit byte swap
1100	 * passthru option to convert from big endian to little endian.
1101	 */
1102	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1103				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1104				   DMA_BIDIRECTIONAL);
1105	if (ret)
1106		goto e_key;
1107
1108	if (aes->mode != CCP_AES_MODE_ECB) {
1109		/* Load the AES context - conver to LE */
1110		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1111		ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1112		ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1113				      CCP_PASSTHRU_BYTESWAP_256BIT);
1114		if (ret) {
1115			cmd->engine_error = cmd_q->cmd_error;
1116			goto e_ctx;
1117		}
1118	}
1119
1120	/* Prepare the input and output data workareas. For in-place
1121	 * operations we need to set the dma direction to BIDIRECTIONAL
1122	 * and copy the src workarea to the dst workarea.
1123	 */
1124	if (sg_virt(aes->src) == sg_virt(aes->dst))
1125		in_place = true;
1126
1127	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
1128			    AES_BLOCK_SIZE,
1129			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1130	if (ret)
1131		goto e_ctx;
1132
1133	if (in_place) {
1134		dst = src;
1135	} else {
1136		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
1137				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
1138		if (ret)
1139			goto e_src;
1140	}
1141
1142	/* Send data to the CCP AES engine */
1143	while (src.sg_wa.bytes_left) {
1144		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1145		if (!src.sg_wa.bytes_left) {
1146			op.eom = 1;
1147
1148			/* Since we don't retrieve the AES context in ECB
1149			 * mode we have to wait for the operation to complete
1150			 * on the last piece of data
1151			 */
1152			if (aes->mode == CCP_AES_MODE_ECB)
1153				op.soc = 1;
1154		}
1155
1156		ret = ccp_perform_aes(&op);
1157		if (ret) {
1158			cmd->engine_error = cmd_q->cmd_error;
1159			goto e_dst;
1160		}
1161
1162		ccp_process_data(&src, &dst, &op);
1163	}
1164
1165	if (aes->mode != CCP_AES_MODE_ECB) {
1166		/* Retrieve the AES context - convert from LE to BE using
1167		 * 32-byte (256-bit) byteswapping
1168		 */
1169		ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1170					CCP_PASSTHRU_BYTESWAP_256BIT);
1171		if (ret) {
1172			cmd->engine_error = cmd_q->cmd_error;
1173			goto e_dst;
1174		}
1175
1176		/* ...but we only need AES_BLOCK_SIZE bytes */
1177		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1178		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1179	}
1180
1181e_dst:
1182	if (!in_place)
1183		ccp_free_data(&dst, cmd_q);
1184
1185e_src:
1186	ccp_free_data(&src, cmd_q);
1187
1188e_ctx:
1189	ccp_dm_free(&ctx);
1190
1191e_key:
1192	ccp_dm_free(&key);
1193
1194	return ret;
1195}
1196
1197static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
1198			       struct ccp_cmd *cmd)
1199{
1200	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1201	struct ccp_dm_workarea key, ctx;
1202	struct ccp_data src, dst;
1203	struct ccp_op op;
1204	unsigned int unit_size, dm_offset;
1205	bool in_place = false;
1206	int ret;
1207
1208	switch (xts->unit_size) {
1209	case CCP_XTS_AES_UNIT_SIZE_16:
1210		unit_size = 16;
1211		break;
1212	case CCP_XTS_AES_UNIT_SIZE_512:
1213		unit_size = 512;
1214		break;
1215	case CCP_XTS_AES_UNIT_SIZE_1024:
1216		unit_size = 1024;
1217		break;
1218	case CCP_XTS_AES_UNIT_SIZE_2048:
1219		unit_size = 2048;
1220		break;
1221	case CCP_XTS_AES_UNIT_SIZE_4096:
1222		unit_size = 4096;
1223		break;
1224
1225	default:
1226		return -EINVAL;
1227	}
1228
1229	if (xts->key_len != AES_KEYSIZE_128)
1230		return -EINVAL;
1231
1232	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1233		return -EINVAL;
1234
1235	if (xts->iv_len != AES_BLOCK_SIZE)
1236		return -EINVAL;
1237
1238	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1239		return -EINVAL;
1240
1241	BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
1242	BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
1243
1244	ret = -EIO;
1245	memset(&op, 0, sizeof(op));
1246	op.cmd_q = cmd_q;
1247	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1248	op.ksb_key = cmd_q->ksb_key;
1249	op.ksb_ctx = cmd_q->ksb_ctx;
1250	op.init = 1;
1251	op.u.xts.action = xts->action;
1252	op.u.xts.unit_size = xts->unit_size;
1253
1254	/* All supported key sizes fit in a single (32-byte) KSB entry
1255	 * and must be in little endian format. Use the 256-bit byte
1256	 * swap passthru option to convert from big endian to little
1257	 * endian.
1258	 */
1259	ret = ccp_init_dm_workarea(&key, cmd_q,
1260				   CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1261				   DMA_TO_DEVICE);
1262	if (ret)
1263		return ret;
1264
1265	dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
1266	ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1267	ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
1268	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1269			      CCP_PASSTHRU_BYTESWAP_256BIT);
1270	if (ret) {
1271		cmd->engine_error = cmd_q->cmd_error;
1272		goto e_key;
1273	}
1274
1275	/* The AES context fits in a single (32-byte) KSB entry and
1276	 * for XTS is already in little endian format so no byte swapping
1277	 * is needed.
1278	 */
1279	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1280				   CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1281				   DMA_BIDIRECTIONAL);
1282	if (ret)
1283		goto e_key;
1284
1285	ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1286	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1287			      CCP_PASSTHRU_BYTESWAP_NOOP);
1288	if (ret) {
1289		cmd->engine_error = cmd_q->cmd_error;
1290		goto e_ctx;
1291	}
1292
1293	/* Prepare the input and output data workareas. For in-place
1294	 * operations we need to set the dma direction to BIDIRECTIONAL
1295	 * and copy the src workarea to the dst workarea.
1296	 */
1297	if (sg_virt(xts->src) == sg_virt(xts->dst))
1298		in_place = true;
1299
1300	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1301			    unit_size,
1302			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1303	if (ret)
1304		goto e_ctx;
1305
1306	if (in_place) {
1307		dst = src;
1308	} else {
1309		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1310				    unit_size, DMA_FROM_DEVICE);
1311		if (ret)
1312			goto e_src;
1313	}
1314
1315	/* Send data to the CCP AES engine */
1316	while (src.sg_wa.bytes_left) {
1317		ccp_prepare_data(&src, &dst, &op, unit_size, true);
1318		if (!src.sg_wa.bytes_left)
1319			op.eom = 1;
1320
1321		ret = ccp_perform_xts_aes(&op);
1322		if (ret) {
1323			cmd->engine_error = cmd_q->cmd_error;
1324			goto e_dst;
1325		}
1326
1327		ccp_process_data(&src, &dst, &op);
1328	}
1329
1330	/* Retrieve the AES context - convert from LE to BE using
1331	 * 32-byte (256-bit) byteswapping
1332	 */
1333	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1334				CCP_PASSTHRU_BYTESWAP_256BIT);
1335	if (ret) {
1336		cmd->engine_error = cmd_q->cmd_error;
1337		goto e_dst;
1338	}
1339
1340	/* ...but we only need AES_BLOCK_SIZE bytes */
1341	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1342	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1343
1344e_dst:
1345	if (!in_place)
1346		ccp_free_data(&dst, cmd_q);
1347
1348e_src:
1349	ccp_free_data(&src, cmd_q);
1350
1351e_ctx:
1352	ccp_dm_free(&ctx);
1353
1354e_key:
1355	ccp_dm_free(&key);
1356
1357	return ret;
1358}
1359
1360static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1361{
1362	struct ccp_sha_engine *sha = &cmd->u.sha;
1363	struct ccp_dm_workarea ctx;
1364	struct ccp_data src;
1365	struct ccp_op op;
1366	int ret;
1367
1368	if (sha->ctx_len != CCP_SHA_CTXSIZE)
1369		return -EINVAL;
1370
1371	if (!sha->ctx)
1372		return -EINVAL;
1373
1374	if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
1375		return -EINVAL;
1376
1377	if (!sha->src_len) {
1378		const u8 *sha_zero;
1379
1380		/* Not final, just return */
1381		if (!sha->final)
1382			return 0;
1383
1384		/* CCP can't do a zero length sha operation so the caller
1385		 * must buffer the data.
1386		 */
1387		if (sha->msg_bits)
1388			return -EINVAL;
1389
1390		/* A sha operation for a message with a total length of zero,
1391		 * return known result.
1392		 */
1393		switch (sha->type) {
1394		case CCP_SHA_TYPE_1:
1395			sha_zero = ccp_sha1_zero;
1396			break;
1397		case CCP_SHA_TYPE_224:
1398			sha_zero = ccp_sha224_zero;
1399			break;
1400		case CCP_SHA_TYPE_256:
1401			sha_zero = ccp_sha256_zero;
1402			break;
1403		default:
1404			return -EINVAL;
1405		}
1406
1407		scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1408					 sha->ctx_len, 1);
1409
1410		return 0;
1411	}
1412
1413	if (!sha->src)
1414		return -EINVAL;
1415
1416	BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
1417
1418	memset(&op, 0, sizeof(op));
1419	op.cmd_q = cmd_q;
1420	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1421	op.ksb_ctx = cmd_q->ksb_ctx;
1422	op.u.sha.type = sha->type;
1423	op.u.sha.msg_bits = sha->msg_bits;
1424
1425	/* The SHA context fits in a single (32-byte) KSB entry and
1426	 * must be in little endian format. Use the 256-bit byte swap
1427	 * passthru option to convert from big endian to little endian.
1428	 */
1429	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1430				   CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
1431				   DMA_BIDIRECTIONAL);
1432	if (ret)
1433		return ret;
1434
1435	if (sha->first) {
1436		const __be32 *init;
1437
1438		switch (sha->type) {
1439		case CCP_SHA_TYPE_1:
1440			init = ccp_sha1_init;
1441			break;
1442		case CCP_SHA_TYPE_224:
1443			init = ccp_sha224_init;
1444			break;
1445		case CCP_SHA_TYPE_256:
1446			init = ccp_sha256_init;
1447			break;
1448		default:
1449			ret = -EINVAL;
1450			goto e_ctx;
1451		}
1452		memcpy(ctx.address, init, CCP_SHA_CTXSIZE);
1453	} else {
1454		ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1455	}
1456
1457	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1458			      CCP_PASSTHRU_BYTESWAP_256BIT);
1459	if (ret) {
1460		cmd->engine_error = cmd_q->cmd_error;
1461		goto e_ctx;
1462	}
1463
1464	/* Send data to the CCP SHA engine */
1465	ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1466			    CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
1467	if (ret)
1468		goto e_ctx;
1469
1470	while (src.sg_wa.bytes_left) {
1471		ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
1472		if (sha->final && !src.sg_wa.bytes_left)
1473			op.eom = 1;
1474
1475		ret = ccp_perform_sha(&op);
1476		if (ret) {
1477			cmd->engine_error = cmd_q->cmd_error;
1478			goto e_data;
1479		}
1480
1481		ccp_process_data(&src, NULL, &op);
1482	}
1483
1484	/* Retrieve the SHA context - convert from LE to BE using
1485	 * 32-byte (256-bit) byteswapping to BE
1486	 */
1487	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1488				CCP_PASSTHRU_BYTESWAP_256BIT);
1489	if (ret) {
1490		cmd->engine_error = cmd_q->cmd_error;
1491		goto e_data;
1492	}
1493
1494	ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1495
1496	if (sha->final && sha->opad) {
1497		/* HMAC operation, recursively perform final SHA */
1498		struct ccp_cmd hmac_cmd;
1499		struct scatterlist sg;
1500		u64 block_size, digest_size;
1501		u8 *hmac_buf;
1502
1503		switch (sha->type) {
1504		case CCP_SHA_TYPE_1:
1505			block_size = SHA1_BLOCK_SIZE;
1506			digest_size = SHA1_DIGEST_SIZE;
1507			break;
1508		case CCP_SHA_TYPE_224:
1509			block_size = SHA224_BLOCK_SIZE;
1510			digest_size = SHA224_DIGEST_SIZE;
1511			break;
1512		case CCP_SHA_TYPE_256:
1513			block_size = SHA256_BLOCK_SIZE;
1514			digest_size = SHA256_DIGEST_SIZE;
1515			break;
1516		default:
1517			ret = -EINVAL;
1518			goto e_data;
1519		}
1520
1521		if (sha->opad_len != block_size) {
1522			ret = -EINVAL;
1523			goto e_data;
1524		}
1525
1526		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
1527		if (!hmac_buf) {
1528			ret = -ENOMEM;
1529			goto e_data;
1530		}
1531		sg_init_one(&sg, hmac_buf, block_size + digest_size);
1532
1533		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
1534		memcpy(hmac_buf + block_size, ctx.address, digest_size);
1535
1536		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
1537		hmac_cmd.engine = CCP_ENGINE_SHA;
1538		hmac_cmd.u.sha.type = sha->type;
1539		hmac_cmd.u.sha.ctx = sha->ctx;
1540		hmac_cmd.u.sha.ctx_len = sha->ctx_len;
1541		hmac_cmd.u.sha.src = &sg;
1542		hmac_cmd.u.sha.src_len = block_size + digest_size;
1543		hmac_cmd.u.sha.opad = NULL;
1544		hmac_cmd.u.sha.opad_len = 0;
1545		hmac_cmd.u.sha.first = 1;
1546		hmac_cmd.u.sha.final = 1;
1547		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
1548
1549		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
1550		if (ret)
1551			cmd->engine_error = hmac_cmd.engine_error;
1552
1553		kfree(hmac_buf);
1554	}
1555
1556e_data:
1557	ccp_free_data(&src, cmd_q);
1558
1559e_ctx:
1560	ccp_dm_free(&ctx);
1561
1562	return ret;
1563}
1564
1565static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1566{
1567	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1568	struct ccp_dm_workarea exp, src;
1569	struct ccp_data dst;
1570	struct ccp_op op;
1571	unsigned int ksb_count, i_len, o_len;
1572	int ret;
1573
1574	if (rsa->key_size > CCP_RSA_MAX_WIDTH)
1575		return -EINVAL;
1576
1577	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1578		return -EINVAL;
1579
1580	/* The RSA modulus must precede the message being acted upon, so
1581	 * it must be copied to a DMA area where the message and the
1582	 * modulus can be concatenated.  Therefore the input buffer
1583	 * length required is twice the output buffer length (which
1584	 * must be a multiple of 256-bits).
1585	 */
1586	o_len = ((rsa->key_size + 255) / 256) * 32;
1587	i_len = o_len * 2;
1588
1589	ksb_count = o_len / CCP_KSB_BYTES;
1590
1591	memset(&op, 0, sizeof(op));
1592	op.cmd_q = cmd_q;
1593	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1594	op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
1595	if (!op.ksb_key)
1596		return -EIO;
1597
1598	/* The RSA exponent may span multiple (32-byte) KSB entries and must
1599	 * be in little endian format. Reverse copy each 32-byte chunk
1600	 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
1601	 * and each byte within that chunk and do not perform any byte swap
1602	 * operations on the passthru operation.
1603	 */
1604	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1605	if (ret)
1606		goto e_ksb;
1607
1608	ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len, CCP_KSB_BYTES,
1609				false);
1610	ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
1611			      CCP_PASSTHRU_BYTESWAP_NOOP);
1612	if (ret) {
1613		cmd->engine_error = cmd_q->cmd_error;
1614		goto e_exp;
1615	}
1616
1617	/* Concatenate the modulus and the message. Both the modulus and
1618	 * the operands must be in little endian format.  Since the input
1619	 * is in big endian format it must be converted.
1620	 */
1621	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1622	if (ret)
1623		goto e_exp;
1624
1625	ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len, CCP_KSB_BYTES,
1626				false);
1627	src.address += o_len;	/* Adjust the address for the copy operation */
1628	ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len, CCP_KSB_BYTES,
1629				false);
1630	src.address -= o_len;	/* Reset the address to original value */
1631
1632	/* Prepare the output area for the operation */
1633	ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
1634			    o_len, DMA_FROM_DEVICE);
1635	if (ret)
1636		goto e_src;
1637
1638	op.soc = 1;
1639	op.src.u.dma.address = src.dma.address;
1640	op.src.u.dma.offset = 0;
1641	op.src.u.dma.length = i_len;
1642	op.dst.u.dma.address = dst.dm_wa.dma.address;
1643	op.dst.u.dma.offset = 0;
1644	op.dst.u.dma.length = o_len;
1645
1646	op.u.rsa.mod_size = rsa->key_size;
1647	op.u.rsa.input_len = i_len;
1648
1649	ret = ccp_perform_rsa(&op);
1650	if (ret) {
1651		cmd->engine_error = cmd_q->cmd_error;
1652		goto e_dst;
1653	}
1654
1655	ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len);
1656
1657e_dst:
1658	ccp_free_data(&dst, cmd_q);
1659
1660e_src:
1661	ccp_dm_free(&src);
1662
1663e_exp:
1664	ccp_dm_free(&exp);
1665
1666e_ksb:
1667	ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
1668
1669	return ret;
1670}
1671
1672static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
1673				struct ccp_cmd *cmd)
1674{
1675	struct ccp_passthru_engine *pt = &cmd->u.passthru;
1676	struct ccp_dm_workarea mask;
1677	struct ccp_data src, dst;
1678	struct ccp_op op;
1679	bool in_place = false;
1680	unsigned int i;
1681	int ret;
1682
1683	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1684		return -EINVAL;
1685
1686	if (!pt->src || !pt->dst)
1687		return -EINVAL;
1688
1689	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1690		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1691			return -EINVAL;
1692		if (!pt->mask)
1693			return -EINVAL;
1694	}
1695
1696	BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
1697
1698	memset(&op, 0, sizeof(op));
1699	op.cmd_q = cmd_q;
1700	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1701
1702	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1703		/* Load the mask */
1704		op.ksb_key = cmd_q->ksb_key;
1705
1706		ret = ccp_init_dm_workarea(&mask, cmd_q,
1707					   CCP_PASSTHRU_KSB_COUNT *
1708					   CCP_KSB_BYTES,
1709					   DMA_TO_DEVICE);
1710		if (ret)
1711			return ret;
1712
1713		ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
1714		ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
1715				      CCP_PASSTHRU_BYTESWAP_NOOP);
1716		if (ret) {
1717			cmd->engine_error = cmd_q->cmd_error;
1718			goto e_mask;
1719		}
1720	}
1721
1722	/* Prepare the input and output data workareas. For in-place
1723	 * operations we need to set the dma direction to BIDIRECTIONAL
1724	 * and copy the src workarea to the dst workarea.
1725	 */
1726	if (sg_virt(pt->src) == sg_virt(pt->dst))
1727		in_place = true;
1728
1729	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
1730			    CCP_PASSTHRU_MASKSIZE,
1731			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1732	if (ret)
1733		goto e_mask;
1734
1735	if (in_place) {
1736		dst = src;
1737	} else {
1738		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
1739				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
1740		if (ret)
1741			goto e_src;
1742	}
1743
1744	/* Send data to the CCP Passthru engine
1745	 *   Because the CCP engine works on a single source and destination
1746	 *   dma address at a time, each entry in the source scatterlist
1747	 *   (after the dma_map_sg call) must be less than or equal to the
1748	 *   (remaining) length in the destination scatterlist entry and the
1749	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
1750	 */
1751	dst.sg_wa.sg_used = 0;
1752	for (i = 1; i <= src.sg_wa.dma_count; i++) {
1753		if (!dst.sg_wa.sg ||
1754		    (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
1755			ret = -EINVAL;
1756			goto e_dst;
1757		}
1758
1759		if (i == src.sg_wa.dma_count) {
1760			op.eom = 1;
1761			op.soc = 1;
1762		}
1763
1764		op.src.type = CCP_MEMTYPE_SYSTEM;
1765		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
1766		op.src.u.dma.offset = 0;
1767		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
1768
1769		op.dst.type = CCP_MEMTYPE_SYSTEM;
1770		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
1771		op.dst.u.dma.offset = dst.sg_wa.sg_used;
1772		op.dst.u.dma.length = op.src.u.dma.length;
1773
1774		ret = ccp_perform_passthru(&op);
1775		if (ret) {
1776			cmd->engine_error = cmd_q->cmd_error;
1777			goto e_dst;
1778		}
1779
1780		dst.sg_wa.sg_used += src.sg_wa.sg->length;
1781		if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
1782			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
1783			dst.sg_wa.sg_used = 0;
1784		}
1785		src.sg_wa.sg = sg_next(src.sg_wa.sg);
1786	}
1787
1788e_dst:
1789	if (!in_place)
1790		ccp_free_data(&dst, cmd_q);
1791
1792e_src:
1793	ccp_free_data(&src, cmd_q);
1794
1795e_mask:
1796	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
1797		ccp_dm_free(&mask);
1798
1799	return ret;
1800}
1801
1802static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1803{
1804	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1805	struct ccp_dm_workarea src, dst;
1806	struct ccp_op op;
1807	int ret;
1808	u8 *save;
1809
1810	if (!ecc->u.mm.operand_1 ||
1811	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
1812		return -EINVAL;
1813
1814	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
1815		if (!ecc->u.mm.operand_2 ||
1816		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
1817			return -EINVAL;
1818
1819	if (!ecc->u.mm.result ||
1820	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
1821		return -EINVAL;
1822
1823	memset(&op, 0, sizeof(op));
1824	op.cmd_q = cmd_q;
1825	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1826
1827	/* Concatenate the modulus and the operands. Both the modulus and
1828	 * the operands must be in little endian format.  Since the input
1829	 * is in big endian format it must be converted and placed in a
1830	 * fixed length buffer.
1831	 */
1832	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1833				   DMA_TO_DEVICE);
1834	if (ret)
1835		return ret;
1836
1837	/* Save the workarea address since it is updated in order to perform
1838	 * the concatenation
1839	 */
1840	save = src.address;
1841
1842	/* Copy the ECC modulus */
1843	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1844				CCP_ECC_OPERAND_SIZE, false);
1845	src.address += CCP_ECC_OPERAND_SIZE;
1846
1847	/* Copy the first operand */
1848	ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
1849				ecc->u.mm.operand_1_len,
1850				CCP_ECC_OPERAND_SIZE, false);
1851	src.address += CCP_ECC_OPERAND_SIZE;
1852
1853	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
1854		/* Copy the second operand */
1855		ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
1856					ecc->u.mm.operand_2_len,
1857					CCP_ECC_OPERAND_SIZE, false);
1858		src.address += CCP_ECC_OPERAND_SIZE;
1859	}
1860
1861	/* Restore the workarea address */
1862	src.address = save;
1863
1864	/* Prepare the output area for the operation */
1865	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1866				   DMA_FROM_DEVICE);
1867	if (ret)
1868		goto e_src;
1869
1870	op.soc = 1;
1871	op.src.u.dma.address = src.dma.address;
1872	op.src.u.dma.offset = 0;
1873	op.src.u.dma.length = src.length;
1874	op.dst.u.dma.address = dst.dma.address;
1875	op.dst.u.dma.offset = 0;
1876	op.dst.u.dma.length = dst.length;
1877
1878	op.u.ecc.function = cmd->u.ecc.function;
1879
1880	ret = ccp_perform_ecc(&op);
1881	if (ret) {
1882		cmd->engine_error = cmd_q->cmd_error;
1883		goto e_dst;
1884	}
1885
1886	ecc->ecc_result = le16_to_cpup(
1887		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1888	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1889		ret = -EIO;
1890		goto e_dst;
1891	}
1892
1893	/* Save the ECC result */
1894	ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES);
1895
1896e_dst:
1897	ccp_dm_free(&dst);
1898
1899e_src:
1900	ccp_dm_free(&src);
1901
1902	return ret;
1903}
1904
1905static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1906{
1907	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1908	struct ccp_dm_workarea src, dst;
1909	struct ccp_op op;
1910	int ret;
1911	u8 *save;
1912
1913	if (!ecc->u.pm.point_1.x ||
1914	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
1915	    !ecc->u.pm.point_1.y ||
1916	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
1917		return -EINVAL;
1918
1919	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1920		if (!ecc->u.pm.point_2.x ||
1921		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
1922		    !ecc->u.pm.point_2.y ||
1923		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
1924			return -EINVAL;
1925	} else {
1926		if (!ecc->u.pm.domain_a ||
1927		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
1928			return -EINVAL;
1929
1930		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
1931			if (!ecc->u.pm.scalar ||
1932			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
1933				return -EINVAL;
1934	}
1935
1936	if (!ecc->u.pm.result.x ||
1937	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
1938	    !ecc->u.pm.result.y ||
1939	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
1940		return -EINVAL;
1941
1942	memset(&op, 0, sizeof(op));
1943	op.cmd_q = cmd_q;
1944	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1945
1946	/* Concatenate the modulus and the operands. Both the modulus and
1947	 * the operands must be in little endian format.  Since the input
1948	 * is in big endian format it must be converted and placed in a
1949	 * fixed length buffer.
1950	 */
1951	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1952				   DMA_TO_DEVICE);
1953	if (ret)
1954		return ret;
1955
1956	/* Save the workarea address since it is updated in order to perform
1957	 * the concatenation
1958	 */
1959	save = src.address;
1960
1961	/* Copy the ECC modulus */
1962	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1963				CCP_ECC_OPERAND_SIZE, false);
1964	src.address += CCP_ECC_OPERAND_SIZE;
1965
1966	/* Copy the first point X and Y coordinate */
1967	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
1968				ecc->u.pm.point_1.x_len,
1969				CCP_ECC_OPERAND_SIZE, false);
1970	src.address += CCP_ECC_OPERAND_SIZE;
1971	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
1972				ecc->u.pm.point_1.y_len,
1973				CCP_ECC_OPERAND_SIZE, false);
1974	src.address += CCP_ECC_OPERAND_SIZE;
1975
1976	/* Set the first point Z coordianate to 1 */
1977	*src.address = 0x01;
1978	src.address += CCP_ECC_OPERAND_SIZE;
1979
1980	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1981		/* Copy the second point X and Y coordinate */
1982		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
1983					ecc->u.pm.point_2.x_len,
1984					CCP_ECC_OPERAND_SIZE, false);
1985		src.address += CCP_ECC_OPERAND_SIZE;
1986		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
1987					ecc->u.pm.point_2.y_len,
1988					CCP_ECC_OPERAND_SIZE, false);
1989		src.address += CCP_ECC_OPERAND_SIZE;
1990
1991		/* Set the second point Z coordianate to 1 */
1992		*src.address = 0x01;
1993		src.address += CCP_ECC_OPERAND_SIZE;
1994	} else {
1995		/* Copy the Domain "a" parameter */
1996		ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
1997					ecc->u.pm.domain_a_len,
1998					CCP_ECC_OPERAND_SIZE, false);
1999		src.address += CCP_ECC_OPERAND_SIZE;
2000
2001		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
2002			/* Copy the scalar value */
2003			ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
2004						ecc->u.pm.scalar_len,
2005						CCP_ECC_OPERAND_SIZE, false);
2006			src.address += CCP_ECC_OPERAND_SIZE;
2007		}
2008	}
2009
2010	/* Restore the workarea address */
2011	src.address = save;
2012
2013	/* Prepare the output area for the operation */
2014	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2015				   DMA_FROM_DEVICE);
2016	if (ret)
2017		goto e_src;
2018
2019	op.soc = 1;
2020	op.src.u.dma.address = src.dma.address;
2021	op.src.u.dma.offset = 0;
2022	op.src.u.dma.length = src.length;
2023	op.dst.u.dma.address = dst.dma.address;
2024	op.dst.u.dma.offset = 0;
2025	op.dst.u.dma.length = dst.length;
2026
2027	op.u.ecc.function = cmd->u.ecc.function;
2028
2029	ret = ccp_perform_ecc(&op);
2030	if (ret) {
2031		cmd->engine_error = cmd_q->cmd_error;
2032		goto e_dst;
2033	}
2034
2035	ecc->ecc_result = le16_to_cpup(
2036		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2037	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2038		ret = -EIO;
2039		goto e_dst;
2040	}
2041
2042	/* Save the workarea address since it is updated as we walk through
2043	 * to copy the point math result
2044	 */
2045	save = dst.address;
2046
2047	/* Save the ECC result X and Y coordinates */
2048	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x,
2049				CCP_ECC_MODULUS_BYTES);
2050	dst.address += CCP_ECC_OUTPUT_SIZE;
2051	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y,
2052				CCP_ECC_MODULUS_BYTES);
2053	dst.address += CCP_ECC_OUTPUT_SIZE;
2054
2055	/* Restore the workarea address */
2056	dst.address = save;
2057
2058e_dst:
2059	ccp_dm_free(&dst);
2060
2061e_src:
2062	ccp_dm_free(&src);
2063
2064	return ret;
2065}
2066
2067static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2068{
2069	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2070
2071	ecc->ecc_result = 0;
2072
2073	if (!ecc->mod ||
2074	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
2075		return -EINVAL;
2076
2077	switch (ecc->function) {
2078	case CCP_ECC_FUNCTION_MMUL_384BIT:
2079	case CCP_ECC_FUNCTION_MADD_384BIT:
2080	case CCP_ECC_FUNCTION_MINV_384BIT:
2081		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
2082
2083	case CCP_ECC_FUNCTION_PADD_384BIT:
2084	case CCP_ECC_FUNCTION_PMUL_384BIT:
2085	case CCP_ECC_FUNCTION_PDBL_384BIT:
2086		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
2087
2088	default:
2089		return -EINVAL;
2090	}
2091}
2092
2093int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2094{
2095	int ret;
2096
2097	cmd->engine_error = 0;
2098	cmd_q->cmd_error = 0;
2099	cmd_q->int_rcvd = 0;
2100	cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
2101
2102	switch (cmd->engine) {
2103	case CCP_ENGINE_AES:
2104		ret = ccp_run_aes_cmd(cmd_q, cmd);
2105		break;
2106	case CCP_ENGINE_XTS_AES_128:
2107		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2108		break;
2109	case CCP_ENGINE_SHA:
2110		ret = ccp_run_sha_cmd(cmd_q, cmd);
2111		break;
2112	case CCP_ENGINE_RSA:
2113		ret = ccp_run_rsa_cmd(cmd_q, cmd);
2114		break;
2115	case CCP_ENGINE_PASSTHRU:
2116		ret = ccp_run_passthru_cmd(cmd_q, cmd);
2117		break;
2118	case CCP_ENGINE_ECC:
2119		ret = ccp_run_ecc_cmd(cmd_q, cmd);
2120		break;
2121	default:
2122		ret = -EINVAL;
2123	}
2124
2125	return ret;
2126}
2127