1 /*
2  * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
3  *
4  * Copyright (C) 2015 Martin Willi
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  */
11 
12 #include <crypto/algapi.h>
13 #include <crypto/chacha20.h>
14 #include <linux/crypto.h>
15 #include <linux/kernel.h>
16 #include <linux/module.h>
17 #include <asm/fpu/api.h>
18 #include <asm/simd.h>
19 
20 #define CHACHA20_STATE_ALIGN 16
21 
22 asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
23 asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
24 #ifdef CONFIG_AS_AVX2
25 asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src);
26 static bool chacha20_use_avx2;
27 #endif
28 
chacha20_dosimd(u32 * state,u8 * dst,const u8 * src,unsigned int bytes)29 static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
30 			    unsigned int bytes)
31 {
32 	u8 buf[CHACHA20_BLOCK_SIZE];
33 
34 #ifdef CONFIG_AS_AVX2
35 	if (chacha20_use_avx2) {
36 		while (bytes >= CHACHA20_BLOCK_SIZE * 8) {
37 			chacha20_8block_xor_avx2(state, dst, src);
38 			bytes -= CHACHA20_BLOCK_SIZE * 8;
39 			src += CHACHA20_BLOCK_SIZE * 8;
40 			dst += CHACHA20_BLOCK_SIZE * 8;
41 			state[12] += 8;
42 		}
43 	}
44 #endif
45 	while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
46 		chacha20_4block_xor_ssse3(state, dst, src);
47 		bytes -= CHACHA20_BLOCK_SIZE * 4;
48 		src += CHACHA20_BLOCK_SIZE * 4;
49 		dst += CHACHA20_BLOCK_SIZE * 4;
50 		state[12] += 4;
51 	}
52 	while (bytes >= CHACHA20_BLOCK_SIZE) {
53 		chacha20_block_xor_ssse3(state, dst, src);
54 		bytes -= CHACHA20_BLOCK_SIZE;
55 		src += CHACHA20_BLOCK_SIZE;
56 		dst += CHACHA20_BLOCK_SIZE;
57 		state[12]++;
58 	}
59 	if (bytes) {
60 		memcpy(buf, src, bytes);
61 		chacha20_block_xor_ssse3(state, buf, buf);
62 		memcpy(dst, buf, bytes);
63 	}
64 }
65 
chacha20_simd(struct blkcipher_desc * desc,struct scatterlist * dst,struct scatterlist * src,unsigned int nbytes)66 static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst,
67 			 struct scatterlist *src, unsigned int nbytes)
68 {
69 	u32 *state, state_buf[16 + (CHACHA20_STATE_ALIGN / sizeof(u32)) - 1];
70 	struct blkcipher_walk walk;
71 	int err;
72 
73 	if (!may_use_simd())
74 		return crypto_chacha20_crypt(desc, dst, src, nbytes);
75 
76 	state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN);
77 
78 	blkcipher_walk_init(&walk, dst, src, nbytes);
79 	err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE);
80 
81 	crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv);
82 
83 	kernel_fpu_begin();
84 
85 	while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
86 		chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
87 				rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
88 		err = blkcipher_walk_done(desc, &walk,
89 					  walk.nbytes % CHACHA20_BLOCK_SIZE);
90 	}
91 
92 	if (walk.nbytes) {
93 		chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
94 				walk.nbytes);
95 		err = blkcipher_walk_done(desc, &walk, 0);
96 	}
97 
98 	kernel_fpu_end();
99 
100 	return err;
101 }
102 
103 static struct crypto_alg alg = {
104 	.cra_name		= "chacha20",
105 	.cra_driver_name	= "chacha20-simd",
106 	.cra_priority		= 300,
107 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
108 	.cra_blocksize		= 1,
109 	.cra_type		= &crypto_blkcipher_type,
110 	.cra_ctxsize		= sizeof(struct chacha20_ctx),
111 	.cra_alignmask		= sizeof(u32) - 1,
112 	.cra_module		= THIS_MODULE,
113 	.cra_u			= {
114 		.blkcipher = {
115 			.min_keysize	= CHACHA20_KEY_SIZE,
116 			.max_keysize	= CHACHA20_KEY_SIZE,
117 			.ivsize		= CHACHA20_IV_SIZE,
118 			.geniv		= "seqiv",
119 			.setkey		= crypto_chacha20_setkey,
120 			.encrypt	= chacha20_simd,
121 			.decrypt	= chacha20_simd,
122 		},
123 	},
124 };
125 
chacha20_simd_mod_init(void)126 static int __init chacha20_simd_mod_init(void)
127 {
128 	if (!cpu_has_ssse3)
129 		return -ENODEV;
130 
131 #ifdef CONFIG_AS_AVX2
132 	chacha20_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
133 			    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
134 #endif
135 	return crypto_register_alg(&alg);
136 }
137 
chacha20_simd_mod_fini(void)138 static void __exit chacha20_simd_mod_fini(void)
139 {
140 	crypto_unregister_alg(&alg);
141 }
142 
143 module_init(chacha20_simd_mod_init);
144 module_exit(chacha20_simd_mod_fini);
145 
146 MODULE_LICENSE("GPL");
147 MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
148 MODULE_DESCRIPTION("chacha20 cipher algorithm, SIMD accelerated");
149 MODULE_ALIAS_CRYPTO("chacha20");
150 MODULE_ALIAS_CRYPTO("chacha20-simd");
151