1/*
2 * Copyright (C) 2012,2013 - ARM Ltd
3 * Author: Marc Zyngier <marc.zyngier@arm.com>
4 *
5 * Derived from arch/arm/kvm/coproc.c:
6 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
7 * Authors: Rusty Russell <rusty@rustcorp.com.au>
8 *          Christoffer Dall <c.dall@virtualopensystems.com>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License, version 2, as
12 * published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21 */
22
23#include <linux/kvm_host.h>
24#include <linux/mm.h>
25#include <linux/uaccess.h>
26
27#include <asm/cacheflush.h>
28#include <asm/cputype.h>
29#include <asm/debug-monitors.h>
30#include <asm/esr.h>
31#include <asm/kvm_arm.h>
32#include <asm/kvm_coproc.h>
33#include <asm/kvm_emulate.h>
34#include <asm/kvm_host.h>
35#include <asm/kvm_mmu.h>
36
37#include <trace/events/kvm.h>
38
39#include "sys_regs.h"
40
41/*
42 * All of this file is extremly similar to the ARM coproc.c, but the
43 * types are different. My gut feeling is that it should be pretty
44 * easy to merge, but that would be an ABI breakage -- again. VFP
45 * would also need to be abstracted.
46 *
47 * For AArch32, we only take care of what is being trapped. Anything
48 * that has to do with init and userspace access has to go via the
49 * 64bit interface.
50 */
51
52/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
53static u32 cache_levels;
54
55/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
56#define CSSELR_MAX 12
57
58/* Which cache CCSIDR represents depends on CSSELR value. */
59static u32 get_ccsidr(u32 csselr)
60{
61	u32 ccsidr;
62
63	/* Make sure noone else changes CSSELR during this! */
64	local_irq_disable();
65	/* Put value into CSSELR */
66	asm volatile("msr csselr_el1, %x0" : : "r" (csselr));
67	isb();
68	/* Read result out of CCSIDR */
69	asm volatile("mrs %0, ccsidr_el1" : "=r" (ccsidr));
70	local_irq_enable();
71
72	return ccsidr;
73}
74
75/*
76 * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
77 */
78static bool access_dcsw(struct kvm_vcpu *vcpu,
79			const struct sys_reg_params *p,
80			const struct sys_reg_desc *r)
81{
82	if (!p->is_write)
83		return read_from_write_only(vcpu, p);
84
85	kvm_set_way_flush(vcpu);
86	return true;
87}
88
89/*
90 * Generic accessor for VM registers. Only called as long as HCR_TVM
91 * is set. If the guest enables the MMU, we stop trapping the VM
92 * sys_regs and leave it in complete control of the caches.
93 */
94static bool access_vm_reg(struct kvm_vcpu *vcpu,
95			  const struct sys_reg_params *p,
96			  const struct sys_reg_desc *r)
97{
98	unsigned long val;
99	bool was_enabled = vcpu_has_cache_enabled(vcpu);
100
101	BUG_ON(!p->is_write);
102
103	val = *vcpu_reg(vcpu, p->Rt);
104	if (!p->is_aarch32) {
105		vcpu_sys_reg(vcpu, r->reg) = val;
106	} else {
107		if (!p->is_32bit)
108			vcpu_cp15_64_high(vcpu, r->reg) = val >> 32;
109		vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL;
110	}
111
112	kvm_toggle_cache(vcpu, was_enabled);
113	return true;
114}
115
116/*
117 * Trap handler for the GICv3 SGI generation system register.
118 * Forward the request to the VGIC emulation.
119 * The cp15_64 code makes sure this automatically works
120 * for both AArch64 and AArch32 accesses.
121 */
122static bool access_gic_sgi(struct kvm_vcpu *vcpu,
123			   const struct sys_reg_params *p,
124			   const struct sys_reg_desc *r)
125{
126	u64 val;
127
128	if (!p->is_write)
129		return read_from_write_only(vcpu, p);
130
131	val = *vcpu_reg(vcpu, p->Rt);
132	vgic_v3_dispatch_sgi(vcpu, val);
133
134	return true;
135}
136
137static bool trap_raz_wi(struct kvm_vcpu *vcpu,
138			const struct sys_reg_params *p,
139			const struct sys_reg_desc *r)
140{
141	if (p->is_write)
142		return ignore_write(vcpu, p);
143	else
144		return read_zero(vcpu, p);
145}
146
147static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
148			   const struct sys_reg_params *p,
149			   const struct sys_reg_desc *r)
150{
151	if (p->is_write) {
152		return ignore_write(vcpu, p);
153	} else {
154		*vcpu_reg(vcpu, p->Rt) = (1 << 3);
155		return true;
156	}
157}
158
159static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
160				   const struct sys_reg_params *p,
161				   const struct sys_reg_desc *r)
162{
163	if (p->is_write) {
164		return ignore_write(vcpu, p);
165	} else {
166		u32 val;
167		asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val));
168		*vcpu_reg(vcpu, p->Rt) = val;
169		return true;
170	}
171}
172
173/*
174 * We want to avoid world-switching all the DBG registers all the
175 * time:
176 *
177 * - If we've touched any debug register, it is likely that we're
178 *   going to touch more of them. It then makes sense to disable the
179 *   traps and start doing the save/restore dance
180 * - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is
181 *   then mandatory to save/restore the registers, as the guest
182 *   depends on them.
183 *
184 * For this, we use a DIRTY bit, indicating the guest has modified the
185 * debug registers, used as follow:
186 *
187 * On guest entry:
188 * - If the dirty bit is set (because we're coming back from trapping),
189 *   disable the traps, save host registers, restore guest registers.
190 * - If debug is actively in use (DBG_MDSCR_KDE or DBG_MDSCR_MDE set),
191 *   set the dirty bit, disable the traps, save host registers,
192 *   restore guest registers.
193 * - Otherwise, enable the traps
194 *
195 * On guest exit:
196 * - If the dirty bit is set, save guest registers, restore host
197 *   registers and clear the dirty bit. This ensure that the host can
198 *   now use the debug registers.
199 */
200static bool trap_debug_regs(struct kvm_vcpu *vcpu,
201			    const struct sys_reg_params *p,
202			    const struct sys_reg_desc *r)
203{
204	if (p->is_write) {
205		vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
206		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
207	} else {
208		*vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
209	}
210
211	return true;
212}
213
214static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
215{
216	u64 amair;
217
218	asm volatile("mrs %0, amair_el1\n" : "=r" (amair));
219	vcpu_sys_reg(vcpu, AMAIR_EL1) = amair;
220}
221
222static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
223{
224	u64 mpidr;
225
226	/*
227	 * Map the vcpu_id into the first three affinity level fields of
228	 * the MPIDR. We limit the number of VCPUs in level 0 due to a
229	 * limitation to 16 CPUs in that level in the ICC_SGIxR registers
230	 * of the GICv3 to be able to address each CPU directly when
231	 * sending IPIs.
232	 */
233	mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0);
234	mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1);
235	mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2);
236	vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr;
237}
238
239/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
240#define DBG_BCR_BVR_WCR_WVR_EL1(n)					\
241	/* DBGBVRn_EL1 */						\
242	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100),	\
243	  trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 },		\
244	/* DBGBCRn_EL1 */						\
245	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101),	\
246	  trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 },		\
247	/* DBGWVRn_EL1 */						\
248	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110),	\
249	  trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 },		\
250	/* DBGWCRn_EL1 */						\
251	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111),	\
252	  trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 }
253
254/*
255 * Architected system registers.
256 * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
257 *
258 * We could trap ID_DFR0 and tell the guest we don't support performance
259 * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
260 * NAKed, so it will read the PMCR anyway.
261 *
262 * Therefore we tell the guest we have 0 counters.  Unfortunately, we
263 * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
264 * all PM registers, which doesn't crash the guest kernel at least.
265 *
266 * Debug handling: We do trap most, if not all debug related system
267 * registers. The implementation is good enough to ensure that a guest
268 * can use these with minimal performance degradation. The drawback is
269 * that we don't implement any of the external debug, none of the
270 * OSlock protocol. This should be revisited if we ever encounter a
271 * more demanding guest...
272 */
273static const struct sys_reg_desc sys_reg_descs[] = {
274	/* DC ISW */
275	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b0110), Op2(0b010),
276	  access_dcsw },
277	/* DC CSW */
278	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1010), Op2(0b010),
279	  access_dcsw },
280	/* DC CISW */
281	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010),
282	  access_dcsw },
283
284	DBG_BCR_BVR_WCR_WVR_EL1(0),
285	DBG_BCR_BVR_WCR_WVR_EL1(1),
286	/* MDCCINT_EL1 */
287	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000),
288	  trap_debug_regs, reset_val, MDCCINT_EL1, 0 },
289	/* MDSCR_EL1 */
290	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010),
291	  trap_debug_regs, reset_val, MDSCR_EL1, 0 },
292	DBG_BCR_BVR_WCR_WVR_EL1(2),
293	DBG_BCR_BVR_WCR_WVR_EL1(3),
294	DBG_BCR_BVR_WCR_WVR_EL1(4),
295	DBG_BCR_BVR_WCR_WVR_EL1(5),
296	DBG_BCR_BVR_WCR_WVR_EL1(6),
297	DBG_BCR_BVR_WCR_WVR_EL1(7),
298	DBG_BCR_BVR_WCR_WVR_EL1(8),
299	DBG_BCR_BVR_WCR_WVR_EL1(9),
300	DBG_BCR_BVR_WCR_WVR_EL1(10),
301	DBG_BCR_BVR_WCR_WVR_EL1(11),
302	DBG_BCR_BVR_WCR_WVR_EL1(12),
303	DBG_BCR_BVR_WCR_WVR_EL1(13),
304	DBG_BCR_BVR_WCR_WVR_EL1(14),
305	DBG_BCR_BVR_WCR_WVR_EL1(15),
306
307	/* MDRAR_EL1 */
308	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
309	  trap_raz_wi },
310	/* OSLAR_EL1 */
311	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b100),
312	  trap_raz_wi },
313	/* OSLSR_EL1 */
314	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0001), Op2(0b100),
315	  trap_oslsr_el1 },
316	/* OSDLR_EL1 */
317	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0011), Op2(0b100),
318	  trap_raz_wi },
319	/* DBGPRCR_EL1 */
320	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0100), Op2(0b100),
321	  trap_raz_wi },
322	/* DBGCLAIMSET_EL1 */
323	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1000), Op2(0b110),
324	  trap_raz_wi },
325	/* DBGCLAIMCLR_EL1 */
326	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1001), Op2(0b110),
327	  trap_raz_wi },
328	/* DBGAUTHSTATUS_EL1 */
329	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110),
330	  trap_dbgauthstatus_el1 },
331
332	/* TEECR32_EL1 */
333	{ Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
334	  NULL, reset_val, TEECR32_EL1, 0 },
335	/* TEEHBR32_EL1 */
336	{ Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000),
337	  NULL, reset_val, TEEHBR32_EL1, 0 },
338
339	/* MDCCSR_EL1 */
340	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000),
341	  trap_raz_wi },
342	/* DBGDTR_EL0 */
343	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0100), Op2(0b000),
344	  trap_raz_wi },
345	/* DBGDTR[TR]X_EL0 */
346	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0101), Op2(0b000),
347	  trap_raz_wi },
348
349	/* DBGVCR32_EL2 */
350	{ Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000),
351	  NULL, reset_val, DBGVCR32_EL2, 0 },
352
353	/* MPIDR_EL1 */
354	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b101),
355	  NULL, reset_mpidr, MPIDR_EL1 },
356	/* SCTLR_EL1 */
357	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
358	  access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 },
359	/* CPACR_EL1 */
360	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010),
361	  NULL, reset_val, CPACR_EL1, 0 },
362	/* TTBR0_EL1 */
363	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000),
364	  access_vm_reg, reset_unknown, TTBR0_EL1 },
365	/* TTBR1_EL1 */
366	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001),
367	  access_vm_reg, reset_unknown, TTBR1_EL1 },
368	/* TCR_EL1 */
369	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010),
370	  access_vm_reg, reset_val, TCR_EL1, 0 },
371
372	/* AFSR0_EL1 */
373	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000),
374	  access_vm_reg, reset_unknown, AFSR0_EL1 },
375	/* AFSR1_EL1 */
376	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001),
377	  access_vm_reg, reset_unknown, AFSR1_EL1 },
378	/* ESR_EL1 */
379	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000),
380	  access_vm_reg, reset_unknown, ESR_EL1 },
381	/* FAR_EL1 */
382	{ Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000),
383	  access_vm_reg, reset_unknown, FAR_EL1 },
384	/* PAR_EL1 */
385	{ Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000),
386	  NULL, reset_unknown, PAR_EL1 },
387
388	/* PMINTENSET_EL1 */
389	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),
390	  trap_raz_wi },
391	/* PMINTENCLR_EL1 */
392	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010),
393	  trap_raz_wi },
394
395	/* MAIR_EL1 */
396	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000),
397	  access_vm_reg, reset_unknown, MAIR_EL1 },
398	/* AMAIR_EL1 */
399	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000),
400	  access_vm_reg, reset_amair_el1, AMAIR_EL1 },
401
402	/* VBAR_EL1 */
403	{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000),
404	  NULL, reset_val, VBAR_EL1, 0 },
405
406	/* ICC_SGI1R_EL1 */
407	{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1011), Op2(0b101),
408	  access_gic_sgi },
409	/* ICC_SRE_EL1 */
410	{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1100), Op2(0b101),
411	  trap_raz_wi },
412
413	/* CONTEXTIDR_EL1 */
414	{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001),
415	  access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
416	/* TPIDR_EL1 */
417	{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100),
418	  NULL, reset_unknown, TPIDR_EL1 },
419
420	/* CNTKCTL_EL1 */
421	{ Op0(0b11), Op1(0b000), CRn(0b1110), CRm(0b0001), Op2(0b000),
422	  NULL, reset_val, CNTKCTL_EL1, 0},
423
424	/* CSSELR_EL1 */
425	{ Op0(0b11), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
426	  NULL, reset_unknown, CSSELR_EL1 },
427
428	/* PMCR_EL0 */
429	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000),
430	  trap_raz_wi },
431	/* PMCNTENSET_EL0 */
432	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001),
433	  trap_raz_wi },
434	/* PMCNTENCLR_EL0 */
435	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010),
436	  trap_raz_wi },
437	/* PMOVSCLR_EL0 */
438	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011),
439	  trap_raz_wi },
440	/* PMSWINC_EL0 */
441	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100),
442	  trap_raz_wi },
443	/* PMSELR_EL0 */
444	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101),
445	  trap_raz_wi },
446	/* PMCEID0_EL0 */
447	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110),
448	  trap_raz_wi },
449	/* PMCEID1_EL0 */
450	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111),
451	  trap_raz_wi },
452	/* PMCCNTR_EL0 */
453	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000),
454	  trap_raz_wi },
455	/* PMXEVTYPER_EL0 */
456	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001),
457	  trap_raz_wi },
458	/* PMXEVCNTR_EL0 */
459	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010),
460	  trap_raz_wi },
461	/* PMUSERENR_EL0 */
462	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000),
463	  trap_raz_wi },
464	/* PMOVSSET_EL0 */
465	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011),
466	  trap_raz_wi },
467
468	/* TPIDR_EL0 */
469	{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010),
470	  NULL, reset_unknown, TPIDR_EL0 },
471	/* TPIDRRO_EL0 */
472	{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011),
473	  NULL, reset_unknown, TPIDRRO_EL0 },
474
475	/* DACR32_EL2 */
476	{ Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000),
477	  NULL, reset_unknown, DACR32_EL2 },
478	/* IFSR32_EL2 */
479	{ Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0000), Op2(0b001),
480	  NULL, reset_unknown, IFSR32_EL2 },
481	/* FPEXC32_EL2 */
482	{ Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0011), Op2(0b000),
483	  NULL, reset_val, FPEXC32_EL2, 0x70 },
484};
485
486static bool trap_dbgidr(struct kvm_vcpu *vcpu,
487			const struct sys_reg_params *p,
488			const struct sys_reg_desc *r)
489{
490	if (p->is_write) {
491		return ignore_write(vcpu, p);
492	} else {
493		u64 dfr = read_cpuid(ID_AA64DFR0_EL1);
494		u64 pfr = read_cpuid(ID_AA64PFR0_EL1);
495		u32 el3 = !!((pfr >> 12) & 0xf);
496
497		*vcpu_reg(vcpu, p->Rt) = ((((dfr >> 20) & 0xf) << 28) |
498					  (((dfr >> 12) & 0xf) << 24) |
499					  (((dfr >> 28) & 0xf) << 20) |
500					  (6 << 16) | (el3 << 14) | (el3 << 12));
501		return true;
502	}
503}
504
505static bool trap_debug32(struct kvm_vcpu *vcpu,
506			 const struct sys_reg_params *p,
507			 const struct sys_reg_desc *r)
508{
509	if (p->is_write) {
510		vcpu_cp14(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
511		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
512	} else {
513		*vcpu_reg(vcpu, p->Rt) = vcpu_cp14(vcpu, r->reg);
514	}
515
516	return true;
517}
518
519#define DBG_BCR_BVR_WCR_WVR(n)					\
520	/* DBGBVRn */						\
521	{ Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_debug32,	\
522	  NULL, (cp14_DBGBVR0 + (n) * 2) },			\
523	/* DBGBCRn */						\
524	{ Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_debug32,	\
525	  NULL, (cp14_DBGBCR0 + (n) * 2) },			\
526	/* DBGWVRn */						\
527	{ Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_debug32,	\
528	  NULL, (cp14_DBGWVR0 + (n) * 2) },			\
529	/* DBGWCRn */						\
530	{ Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_debug32,	\
531	  NULL, (cp14_DBGWCR0 + (n) * 2) }
532
533#define DBGBXVR(n)						\
534	{ Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_debug32,	\
535	  NULL, cp14_DBGBXVR0 + n * 2 }
536
537/*
538 * Trapped cp14 registers. We generally ignore most of the external
539 * debug, on the principle that they don't really make sense to a
540 * guest. Revisit this one day, whould this principle change.
541 */
542static const struct sys_reg_desc cp14_regs[] = {
543	/* DBGIDR */
544	{ Op1( 0), CRn( 0), CRm( 0), Op2( 0), trap_dbgidr },
545	/* DBGDTRRXext */
546	{ Op1( 0), CRn( 0), CRm( 0), Op2( 2), trap_raz_wi },
547
548	DBG_BCR_BVR_WCR_WVR(0),
549	/* DBGDSCRint */
550	{ Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi },
551	DBG_BCR_BVR_WCR_WVR(1),
552	/* DBGDCCINT */
553	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32 },
554	/* DBGDSCRext */
555	{ Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32 },
556	DBG_BCR_BVR_WCR_WVR(2),
557	/* DBGDTR[RT]Xint */
558	{ Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi },
559	/* DBGDTR[RT]Xext */
560	{ Op1( 0), CRn( 0), CRm( 3), Op2( 2), trap_raz_wi },
561	DBG_BCR_BVR_WCR_WVR(3),
562	DBG_BCR_BVR_WCR_WVR(4),
563	DBG_BCR_BVR_WCR_WVR(5),
564	/* DBGWFAR */
565	{ Op1( 0), CRn( 0), CRm( 6), Op2( 0), trap_raz_wi },
566	/* DBGOSECCR */
567	{ Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi },
568	DBG_BCR_BVR_WCR_WVR(6),
569	/* DBGVCR */
570	{ Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32 },
571	DBG_BCR_BVR_WCR_WVR(7),
572	DBG_BCR_BVR_WCR_WVR(8),
573	DBG_BCR_BVR_WCR_WVR(9),
574	DBG_BCR_BVR_WCR_WVR(10),
575	DBG_BCR_BVR_WCR_WVR(11),
576	DBG_BCR_BVR_WCR_WVR(12),
577	DBG_BCR_BVR_WCR_WVR(13),
578	DBG_BCR_BVR_WCR_WVR(14),
579	DBG_BCR_BVR_WCR_WVR(15),
580
581	/* DBGDRAR (32bit) */
582	{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), trap_raz_wi },
583
584	DBGBXVR(0),
585	/* DBGOSLAR */
586	{ Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_raz_wi },
587	DBGBXVR(1),
588	/* DBGOSLSR */
589	{ Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1 },
590	DBGBXVR(2),
591	DBGBXVR(3),
592	/* DBGOSDLR */
593	{ Op1( 0), CRn( 1), CRm( 3), Op2( 4), trap_raz_wi },
594	DBGBXVR(4),
595	/* DBGPRCR */
596	{ Op1( 0), CRn( 1), CRm( 4), Op2( 4), trap_raz_wi },
597	DBGBXVR(5),
598	DBGBXVR(6),
599	DBGBXVR(7),
600	DBGBXVR(8),
601	DBGBXVR(9),
602	DBGBXVR(10),
603	DBGBXVR(11),
604	DBGBXVR(12),
605	DBGBXVR(13),
606	DBGBXVR(14),
607	DBGBXVR(15),
608
609	/* DBGDSAR (32bit) */
610	{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), trap_raz_wi },
611
612	/* DBGDEVID2 */
613	{ Op1( 0), CRn( 7), CRm( 0), Op2( 7), trap_raz_wi },
614	/* DBGDEVID1 */
615	{ Op1( 0), CRn( 7), CRm( 1), Op2( 7), trap_raz_wi },
616	/* DBGDEVID */
617	{ Op1( 0), CRn( 7), CRm( 2), Op2( 7), trap_raz_wi },
618	/* DBGCLAIMSET */
619	{ Op1( 0), CRn( 7), CRm( 8), Op2( 6), trap_raz_wi },
620	/* DBGCLAIMCLR */
621	{ Op1( 0), CRn( 7), CRm( 9), Op2( 6), trap_raz_wi },
622	/* DBGAUTHSTATUS */
623	{ Op1( 0), CRn( 7), CRm(14), Op2( 6), trap_dbgauthstatus_el1 },
624};
625
626/* Trapped cp14 64bit registers */
627static const struct sys_reg_desc cp14_64_regs[] = {
628	/* DBGDRAR (64bit) */
629	{ Op1( 0), CRm( 1), .access = trap_raz_wi },
630
631	/* DBGDSAR (64bit) */
632	{ Op1( 0), CRm( 2), .access = trap_raz_wi },
633};
634
635/*
636 * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
637 * depending on the way they are accessed (as a 32bit or a 64bit
638 * register).
639 */
640static const struct sys_reg_desc cp15_regs[] = {
641	{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
642
643	{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR },
644	{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
645	{ Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
646	{ Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR },
647	{ Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR },
648	{ Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR },
649	{ Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR },
650	{ Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, c5_ADFSR },
651	{ Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, c5_AIFSR },
652	{ Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, c6_DFAR },
653	{ Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, c6_IFAR },
654
655	/*
656	 * DC{C,I,CI}SW operations:
657	 */
658	{ Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw },
659	{ Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw },
660	{ Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
661
662	/* PMU */
663	{ Op1( 0), CRn( 9), CRm(12), Op2( 0), trap_raz_wi },
664	{ Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi },
665	{ Op1( 0), CRn( 9), CRm(12), Op2( 2), trap_raz_wi },
666	{ Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi },
667	{ Op1( 0), CRn( 9), CRm(12), Op2( 5), trap_raz_wi },
668	{ Op1( 0), CRn( 9), CRm(12), Op2( 6), trap_raz_wi },
669	{ Op1( 0), CRn( 9), CRm(12), Op2( 7), trap_raz_wi },
670	{ Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi },
671	{ Op1( 0), CRn( 9), CRm(13), Op2( 1), trap_raz_wi },
672	{ Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi },
673	{ Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi },
674	{ Op1( 0), CRn( 9), CRm(14), Op2( 1), trap_raz_wi },
675	{ Op1( 0), CRn( 9), CRm(14), Op2( 2), trap_raz_wi },
676
677	{ Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR },
678	{ Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR },
679	{ Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 },
680	{ Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
681
682	/* ICC_SRE */
683	{ Op1( 0), CRn(12), CRm(12), Op2( 5), trap_raz_wi },
684
685	{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
686};
687
688static const struct sys_reg_desc cp15_64_regs[] = {
689	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
690	{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
691	{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
692};
693
694/* Target specific emulation tables */
695static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS];
696
697void kvm_register_target_sys_reg_table(unsigned int target,
698				       struct kvm_sys_reg_target_table *table)
699{
700	target_tables[target] = table;
701}
702
703/* Get specific register table for this target. */
704static const struct sys_reg_desc *get_target_table(unsigned target,
705						   bool mode_is_64,
706						   size_t *num)
707{
708	struct kvm_sys_reg_target_table *table;
709
710	table = target_tables[target];
711	if (mode_is_64) {
712		*num = table->table64.num;
713		return table->table64.table;
714	} else {
715		*num = table->table32.num;
716		return table->table32.table;
717	}
718}
719
720static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
721					 const struct sys_reg_desc table[],
722					 unsigned int num)
723{
724	unsigned int i;
725
726	for (i = 0; i < num; i++) {
727		const struct sys_reg_desc *r = &table[i];
728
729		if (params->Op0 != r->Op0)
730			continue;
731		if (params->Op1 != r->Op1)
732			continue;
733		if (params->CRn != r->CRn)
734			continue;
735		if (params->CRm != r->CRm)
736			continue;
737		if (params->Op2 != r->Op2)
738			continue;
739
740		return r;
741	}
742	return NULL;
743}
744
745int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
746{
747	kvm_inject_undefined(vcpu);
748	return 1;
749}
750
751/*
752 * emulate_cp --  tries to match a sys_reg access in a handling table, and
753 *                call the corresponding trap handler.
754 *
755 * @params: pointer to the descriptor of the access
756 * @table: array of trap descriptors
757 * @num: size of the trap descriptor array
758 *
759 * Return 0 if the access has been handled, and -1 if not.
760 */
761static int emulate_cp(struct kvm_vcpu *vcpu,
762		      const struct sys_reg_params *params,
763		      const struct sys_reg_desc *table,
764		      size_t num)
765{
766	const struct sys_reg_desc *r;
767
768	if (!table)
769		return -1;	/* Not handled */
770
771	r = find_reg(params, table, num);
772
773	if (r) {
774		/*
775		 * Not having an accessor means that we have
776		 * configured a trap that we don't know how to
777		 * handle. This certainly qualifies as a gross bug
778		 * that should be fixed right away.
779		 */
780		BUG_ON(!r->access);
781
782		if (likely(r->access(vcpu, params, r))) {
783			/* Skip instruction, since it was emulated */
784			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
785		}
786
787		/* Handled */
788		return 0;
789	}
790
791	/* Not handled */
792	return -1;
793}
794
795static void unhandled_cp_access(struct kvm_vcpu *vcpu,
796				struct sys_reg_params *params)
797{
798	u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
799	int cp;
800
801	switch(hsr_ec) {
802	case ESR_ELx_EC_CP15_32:
803	case ESR_ELx_EC_CP15_64:
804		cp = 15;
805		break;
806	case ESR_ELx_EC_CP14_MR:
807	case ESR_ELx_EC_CP14_64:
808		cp = 14;
809		break;
810	default:
811		WARN_ON((cp = -1));
812	}
813
814	kvm_err("Unsupported guest CP%d access at: %08lx\n",
815		cp, *vcpu_pc(vcpu));
816	print_sys_reg_instr(params);
817	kvm_inject_undefined(vcpu);
818}
819
820/**
821 * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP15 access
822 * @vcpu: The VCPU pointer
823 * @run:  The kvm_run struct
824 */
825static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
826			    const struct sys_reg_desc *global,
827			    size_t nr_global,
828			    const struct sys_reg_desc *target_specific,
829			    size_t nr_specific)
830{
831	struct sys_reg_params params;
832	u32 hsr = kvm_vcpu_get_hsr(vcpu);
833	int Rt2 = (hsr >> 10) & 0xf;
834
835	params.is_aarch32 = true;
836	params.is_32bit = false;
837	params.CRm = (hsr >> 1) & 0xf;
838	params.Rt = (hsr >> 5) & 0xf;
839	params.is_write = ((hsr & 1) == 0);
840
841	params.Op0 = 0;
842	params.Op1 = (hsr >> 16) & 0xf;
843	params.Op2 = 0;
844	params.CRn = 0;
845
846	/*
847	 * Massive hack here. Store Rt2 in the top 32bits so we only
848	 * have one register to deal with. As we use the same trap
849	 * backends between AArch32 and AArch64, we get away with it.
850	 */
851	if (params.is_write) {
852		u64 val = *vcpu_reg(vcpu, params.Rt);
853		val &= 0xffffffff;
854		val |= *vcpu_reg(vcpu, Rt2) << 32;
855		*vcpu_reg(vcpu, params.Rt) = val;
856	}
857
858	if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
859		goto out;
860	if (!emulate_cp(vcpu, &params, global, nr_global))
861		goto out;
862
863	unhandled_cp_access(vcpu, &params);
864
865out:
866	/* Do the opposite hack for the read side */
867	if (!params.is_write) {
868		u64 val = *vcpu_reg(vcpu, params.Rt);
869		val >>= 32;
870		*vcpu_reg(vcpu, Rt2) = val;
871	}
872
873	return 1;
874}
875
876/**
877 * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access
878 * @vcpu: The VCPU pointer
879 * @run:  The kvm_run struct
880 */
881static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
882			    const struct sys_reg_desc *global,
883			    size_t nr_global,
884			    const struct sys_reg_desc *target_specific,
885			    size_t nr_specific)
886{
887	struct sys_reg_params params;
888	u32 hsr = kvm_vcpu_get_hsr(vcpu);
889
890	params.is_aarch32 = true;
891	params.is_32bit = true;
892	params.CRm = (hsr >> 1) & 0xf;
893	params.Rt  = (hsr >> 5) & 0xf;
894	params.is_write = ((hsr & 1) == 0);
895	params.CRn = (hsr >> 10) & 0xf;
896	params.Op0 = 0;
897	params.Op1 = (hsr >> 14) & 0x7;
898	params.Op2 = (hsr >> 17) & 0x7;
899
900	if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
901		return 1;
902	if (!emulate_cp(vcpu, &params, global, nr_global))
903		return 1;
904
905	unhandled_cp_access(vcpu, &params);
906	return 1;
907}
908
909int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
910{
911	const struct sys_reg_desc *target_specific;
912	size_t num;
913
914	target_specific = get_target_table(vcpu->arch.target, false, &num);
915	return kvm_handle_cp_64(vcpu,
916				cp15_64_regs, ARRAY_SIZE(cp15_64_regs),
917				target_specific, num);
918}
919
920int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
921{
922	const struct sys_reg_desc *target_specific;
923	size_t num;
924
925	target_specific = get_target_table(vcpu->arch.target, false, &num);
926	return kvm_handle_cp_32(vcpu,
927				cp15_regs, ARRAY_SIZE(cp15_regs),
928				target_specific, num);
929}
930
931int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
932{
933	return kvm_handle_cp_64(vcpu,
934				cp14_64_regs, ARRAY_SIZE(cp14_64_regs),
935				NULL, 0);
936}
937
938int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
939{
940	return kvm_handle_cp_32(vcpu,
941				cp14_regs, ARRAY_SIZE(cp14_regs),
942				NULL, 0);
943}
944
945static int emulate_sys_reg(struct kvm_vcpu *vcpu,
946			   const struct sys_reg_params *params)
947{
948	size_t num;
949	const struct sys_reg_desc *table, *r;
950
951	table = get_target_table(vcpu->arch.target, true, &num);
952
953	/* Search target-specific then generic table. */
954	r = find_reg(params, table, num);
955	if (!r)
956		r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
957
958	if (likely(r)) {
959		/*
960		 * Not having an accessor means that we have
961		 * configured a trap that we don't know how to
962		 * handle. This certainly qualifies as a gross bug
963		 * that should be fixed right away.
964		 */
965		BUG_ON(!r->access);
966
967		if (likely(r->access(vcpu, params, r))) {
968			/* Skip instruction, since it was emulated */
969			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
970			return 1;
971		}
972		/* If access function fails, it should complain. */
973	} else {
974		kvm_err("Unsupported guest sys_reg access at: %lx\n",
975			*vcpu_pc(vcpu));
976		print_sys_reg_instr(params);
977	}
978	kvm_inject_undefined(vcpu);
979	return 1;
980}
981
982static void reset_sys_reg_descs(struct kvm_vcpu *vcpu,
983			      const struct sys_reg_desc *table, size_t num)
984{
985	unsigned long i;
986
987	for (i = 0; i < num; i++)
988		if (table[i].reset)
989			table[i].reset(vcpu, &table[i]);
990}
991
992/**
993 * kvm_handle_sys_reg -- handles a mrs/msr trap on a guest sys_reg access
994 * @vcpu: The VCPU pointer
995 * @run:  The kvm_run struct
996 */
997int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
998{
999	struct sys_reg_params params;
1000	unsigned long esr = kvm_vcpu_get_hsr(vcpu);
1001
1002	params.is_aarch32 = false;
1003	params.is_32bit = false;
1004	params.Op0 = (esr >> 20) & 3;
1005	params.Op1 = (esr >> 14) & 0x7;
1006	params.CRn = (esr >> 10) & 0xf;
1007	params.CRm = (esr >> 1) & 0xf;
1008	params.Op2 = (esr >> 17) & 0x7;
1009	params.Rt = (esr >> 5) & 0x1f;
1010	params.is_write = !(esr & 1);
1011
1012	return emulate_sys_reg(vcpu, &params);
1013}
1014
1015/******************************************************************************
1016 * Userspace API
1017 *****************************************************************************/
1018
1019static bool index_to_params(u64 id, struct sys_reg_params *params)
1020{
1021	switch (id & KVM_REG_SIZE_MASK) {
1022	case KVM_REG_SIZE_U64:
1023		/* Any unused index bits means it's not valid. */
1024		if (id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK
1025			      | KVM_REG_ARM_COPROC_MASK
1026			      | KVM_REG_ARM64_SYSREG_OP0_MASK
1027			      | KVM_REG_ARM64_SYSREG_OP1_MASK
1028			      | KVM_REG_ARM64_SYSREG_CRN_MASK
1029			      | KVM_REG_ARM64_SYSREG_CRM_MASK
1030			      | KVM_REG_ARM64_SYSREG_OP2_MASK))
1031			return false;
1032		params->Op0 = ((id & KVM_REG_ARM64_SYSREG_OP0_MASK)
1033			       >> KVM_REG_ARM64_SYSREG_OP0_SHIFT);
1034		params->Op1 = ((id & KVM_REG_ARM64_SYSREG_OP1_MASK)
1035			       >> KVM_REG_ARM64_SYSREG_OP1_SHIFT);
1036		params->CRn = ((id & KVM_REG_ARM64_SYSREG_CRN_MASK)
1037			       >> KVM_REG_ARM64_SYSREG_CRN_SHIFT);
1038		params->CRm = ((id & KVM_REG_ARM64_SYSREG_CRM_MASK)
1039			       >> KVM_REG_ARM64_SYSREG_CRM_SHIFT);
1040		params->Op2 = ((id & KVM_REG_ARM64_SYSREG_OP2_MASK)
1041			       >> KVM_REG_ARM64_SYSREG_OP2_SHIFT);
1042		return true;
1043	default:
1044		return false;
1045	}
1046}
1047
1048/* Decode an index value, and find the sys_reg_desc entry. */
1049static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
1050						    u64 id)
1051{
1052	size_t num;
1053	const struct sys_reg_desc *table, *r;
1054	struct sys_reg_params params;
1055
1056	/* We only do sys_reg for now. */
1057	if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG)
1058		return NULL;
1059
1060	if (!index_to_params(id, &params))
1061		return NULL;
1062
1063	table = get_target_table(vcpu->arch.target, true, &num);
1064	r = find_reg(&params, table, num);
1065	if (!r)
1066		r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
1067
1068	/* Not saved in the sys_reg array? */
1069	if (r && !r->reg)
1070		r = NULL;
1071
1072	return r;
1073}
1074
1075/*
1076 * These are the invariant sys_reg registers: we let the guest see the
1077 * host versions of these, so they're part of the guest state.
1078 *
1079 * A future CPU may provide a mechanism to present different values to
1080 * the guest, or a future kvm may trap them.
1081 */
1082
1083#define FUNCTION_INVARIANT(reg)						\
1084	static void get_##reg(struct kvm_vcpu *v,			\
1085			      const struct sys_reg_desc *r)		\
1086	{								\
1087		u64 val;						\
1088									\
1089		asm volatile("mrs %0, " __stringify(reg) "\n"		\
1090			     : "=r" (val));				\
1091		((struct sys_reg_desc *)r)->val = val;			\
1092	}
1093
1094FUNCTION_INVARIANT(midr_el1)
1095FUNCTION_INVARIANT(ctr_el0)
1096FUNCTION_INVARIANT(revidr_el1)
1097FUNCTION_INVARIANT(id_pfr0_el1)
1098FUNCTION_INVARIANT(id_pfr1_el1)
1099FUNCTION_INVARIANT(id_dfr0_el1)
1100FUNCTION_INVARIANT(id_afr0_el1)
1101FUNCTION_INVARIANT(id_mmfr0_el1)
1102FUNCTION_INVARIANT(id_mmfr1_el1)
1103FUNCTION_INVARIANT(id_mmfr2_el1)
1104FUNCTION_INVARIANT(id_mmfr3_el1)
1105FUNCTION_INVARIANT(id_isar0_el1)
1106FUNCTION_INVARIANT(id_isar1_el1)
1107FUNCTION_INVARIANT(id_isar2_el1)
1108FUNCTION_INVARIANT(id_isar3_el1)
1109FUNCTION_INVARIANT(id_isar4_el1)
1110FUNCTION_INVARIANT(id_isar5_el1)
1111FUNCTION_INVARIANT(clidr_el1)
1112FUNCTION_INVARIANT(aidr_el1)
1113
1114/* ->val is filled in by kvm_sys_reg_table_init() */
1115static struct sys_reg_desc invariant_sys_regs[] = {
1116	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b000),
1117	  NULL, get_midr_el1 },
1118	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b110),
1119	  NULL, get_revidr_el1 },
1120	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b000),
1121	  NULL, get_id_pfr0_el1 },
1122	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b001),
1123	  NULL, get_id_pfr1_el1 },
1124	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b010),
1125	  NULL, get_id_dfr0_el1 },
1126	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b011),
1127	  NULL, get_id_afr0_el1 },
1128	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b100),
1129	  NULL, get_id_mmfr0_el1 },
1130	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b101),
1131	  NULL, get_id_mmfr1_el1 },
1132	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b110),
1133	  NULL, get_id_mmfr2_el1 },
1134	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b111),
1135	  NULL, get_id_mmfr3_el1 },
1136	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000),
1137	  NULL, get_id_isar0_el1 },
1138	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b001),
1139	  NULL, get_id_isar1_el1 },
1140	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010),
1141	  NULL, get_id_isar2_el1 },
1142	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b011),
1143	  NULL, get_id_isar3_el1 },
1144	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b100),
1145	  NULL, get_id_isar4_el1 },
1146	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b101),
1147	  NULL, get_id_isar5_el1 },
1148	{ Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b001),
1149	  NULL, get_clidr_el1 },
1150	{ Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b111),
1151	  NULL, get_aidr_el1 },
1152	{ Op0(0b11), Op1(0b011), CRn(0b0000), CRm(0b0000), Op2(0b001),
1153	  NULL, get_ctr_el0 },
1154};
1155
1156static int reg_from_user(u64 *val, const void __user *uaddr, u64 id)
1157{
1158	if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
1159		return -EFAULT;
1160	return 0;
1161}
1162
1163static int reg_to_user(void __user *uaddr, const u64 *val, u64 id)
1164{
1165	if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
1166		return -EFAULT;
1167	return 0;
1168}
1169
1170static int get_invariant_sys_reg(u64 id, void __user *uaddr)
1171{
1172	struct sys_reg_params params;
1173	const struct sys_reg_desc *r;
1174
1175	if (!index_to_params(id, &params))
1176		return -ENOENT;
1177
1178	r = find_reg(&params, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs));
1179	if (!r)
1180		return -ENOENT;
1181
1182	return reg_to_user(uaddr, &r->val, id);
1183}
1184
1185static int set_invariant_sys_reg(u64 id, void __user *uaddr)
1186{
1187	struct sys_reg_params params;
1188	const struct sys_reg_desc *r;
1189	int err;
1190	u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
1191
1192	if (!index_to_params(id, &params))
1193		return -ENOENT;
1194	r = find_reg(&params, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs));
1195	if (!r)
1196		return -ENOENT;
1197
1198	err = reg_from_user(&val, uaddr, id);
1199	if (err)
1200		return err;
1201
1202	/* This is what we mean by invariant: you can't change it. */
1203	if (r->val != val)
1204		return -EINVAL;
1205
1206	return 0;
1207}
1208
1209static bool is_valid_cache(u32 val)
1210{
1211	u32 level, ctype;
1212
1213	if (val >= CSSELR_MAX)
1214		return false;
1215
1216	/* Bottom bit is Instruction or Data bit.  Next 3 bits are level. */
1217	level = (val >> 1);
1218	ctype = (cache_levels >> (level * 3)) & 7;
1219
1220	switch (ctype) {
1221	case 0: /* No cache */
1222		return false;
1223	case 1: /* Instruction cache only */
1224		return (val & 1);
1225	case 2: /* Data cache only */
1226	case 4: /* Unified cache */
1227		return !(val & 1);
1228	case 3: /* Separate instruction and data caches */
1229		return true;
1230	default: /* Reserved: we can't know instruction or data. */
1231		return false;
1232	}
1233}
1234
1235static int demux_c15_get(u64 id, void __user *uaddr)
1236{
1237	u32 val;
1238	u32 __user *uval = uaddr;
1239
1240	/* Fail if we have unknown bits set. */
1241	if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
1242		   | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
1243		return -ENOENT;
1244
1245	switch (id & KVM_REG_ARM_DEMUX_ID_MASK) {
1246	case KVM_REG_ARM_DEMUX_ID_CCSIDR:
1247		if (KVM_REG_SIZE(id) != 4)
1248			return -ENOENT;
1249		val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
1250			>> KVM_REG_ARM_DEMUX_VAL_SHIFT;
1251		if (!is_valid_cache(val))
1252			return -ENOENT;
1253
1254		return put_user(get_ccsidr(val), uval);
1255	default:
1256		return -ENOENT;
1257	}
1258}
1259
1260static int demux_c15_set(u64 id, void __user *uaddr)
1261{
1262	u32 val, newval;
1263	u32 __user *uval = uaddr;
1264
1265	/* Fail if we have unknown bits set. */
1266	if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
1267		   | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
1268		return -ENOENT;
1269
1270	switch (id & KVM_REG_ARM_DEMUX_ID_MASK) {
1271	case KVM_REG_ARM_DEMUX_ID_CCSIDR:
1272		if (KVM_REG_SIZE(id) != 4)
1273			return -ENOENT;
1274		val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
1275			>> KVM_REG_ARM_DEMUX_VAL_SHIFT;
1276		if (!is_valid_cache(val))
1277			return -ENOENT;
1278
1279		if (get_user(newval, uval))
1280			return -EFAULT;
1281
1282		/* This is also invariant: you can't change it. */
1283		if (newval != get_ccsidr(val))
1284			return -EINVAL;
1285		return 0;
1286	default:
1287		return -ENOENT;
1288	}
1289}
1290
1291int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
1292{
1293	const struct sys_reg_desc *r;
1294	void __user *uaddr = (void __user *)(unsigned long)reg->addr;
1295
1296	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
1297		return demux_c15_get(reg->id, uaddr);
1298
1299	if (KVM_REG_SIZE(reg->id) != sizeof(__u64))
1300		return -ENOENT;
1301
1302	r = index_to_sys_reg_desc(vcpu, reg->id);
1303	if (!r)
1304		return get_invariant_sys_reg(reg->id, uaddr);
1305
1306	return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id);
1307}
1308
1309int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
1310{
1311	const struct sys_reg_desc *r;
1312	void __user *uaddr = (void __user *)(unsigned long)reg->addr;
1313
1314	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
1315		return demux_c15_set(reg->id, uaddr);
1316
1317	if (KVM_REG_SIZE(reg->id) != sizeof(__u64))
1318		return -ENOENT;
1319
1320	r = index_to_sys_reg_desc(vcpu, reg->id);
1321	if (!r)
1322		return set_invariant_sys_reg(reg->id, uaddr);
1323
1324	return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
1325}
1326
1327static unsigned int num_demux_regs(void)
1328{
1329	unsigned int i, count = 0;
1330
1331	for (i = 0; i < CSSELR_MAX; i++)
1332		if (is_valid_cache(i))
1333			count++;
1334
1335	return count;
1336}
1337
1338static int write_demux_regids(u64 __user *uindices)
1339{
1340	u64 val = KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX;
1341	unsigned int i;
1342
1343	val |= KVM_REG_ARM_DEMUX_ID_CCSIDR;
1344	for (i = 0; i < CSSELR_MAX; i++) {
1345		if (!is_valid_cache(i))
1346			continue;
1347		if (put_user(val | i, uindices))
1348			return -EFAULT;
1349		uindices++;
1350	}
1351	return 0;
1352}
1353
1354static u64 sys_reg_to_index(const struct sys_reg_desc *reg)
1355{
1356	return (KVM_REG_ARM64 | KVM_REG_SIZE_U64 |
1357		KVM_REG_ARM64_SYSREG |
1358		(reg->Op0 << KVM_REG_ARM64_SYSREG_OP0_SHIFT) |
1359		(reg->Op1 << KVM_REG_ARM64_SYSREG_OP1_SHIFT) |
1360		(reg->CRn << KVM_REG_ARM64_SYSREG_CRN_SHIFT) |
1361		(reg->CRm << KVM_REG_ARM64_SYSREG_CRM_SHIFT) |
1362		(reg->Op2 << KVM_REG_ARM64_SYSREG_OP2_SHIFT));
1363}
1364
1365static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind)
1366{
1367	if (!*uind)
1368		return true;
1369
1370	if (put_user(sys_reg_to_index(reg), *uind))
1371		return false;
1372
1373	(*uind)++;
1374	return true;
1375}
1376
1377/* Assumed ordered tables, see kvm_sys_reg_table_init. */
1378static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind)
1379{
1380	const struct sys_reg_desc *i1, *i2, *end1, *end2;
1381	unsigned int total = 0;
1382	size_t num;
1383
1384	/* We check for duplicates here, to allow arch-specific overrides. */
1385	i1 = get_target_table(vcpu->arch.target, true, &num);
1386	end1 = i1 + num;
1387	i2 = sys_reg_descs;
1388	end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs);
1389
1390	BUG_ON(i1 == end1 || i2 == end2);
1391
1392	/* Walk carefully, as both tables may refer to the same register. */
1393	while (i1 || i2) {
1394		int cmp = cmp_sys_reg(i1, i2);
1395		/* target-specific overrides generic entry. */
1396		if (cmp <= 0) {
1397			/* Ignore registers we trap but don't save. */
1398			if (i1->reg) {
1399				if (!copy_reg_to_user(i1, &uind))
1400					return -EFAULT;
1401				total++;
1402			}
1403		} else {
1404			/* Ignore registers we trap but don't save. */
1405			if (i2->reg) {
1406				if (!copy_reg_to_user(i2, &uind))
1407					return -EFAULT;
1408				total++;
1409			}
1410		}
1411
1412		if (cmp <= 0 && ++i1 == end1)
1413			i1 = NULL;
1414		if (cmp >= 0 && ++i2 == end2)
1415			i2 = NULL;
1416	}
1417	return total;
1418}
1419
1420unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu)
1421{
1422	return ARRAY_SIZE(invariant_sys_regs)
1423		+ num_demux_regs()
1424		+ walk_sys_regs(vcpu, (u64 __user *)NULL);
1425}
1426
1427int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
1428{
1429	unsigned int i;
1430	int err;
1431
1432	/* Then give them all the invariant registers' indices. */
1433	for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) {
1434		if (put_user(sys_reg_to_index(&invariant_sys_regs[i]), uindices))
1435			return -EFAULT;
1436		uindices++;
1437	}
1438
1439	err = walk_sys_regs(vcpu, uindices);
1440	if (err < 0)
1441		return err;
1442	uindices += err;
1443
1444	return write_demux_regids(uindices);
1445}
1446
1447static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n)
1448{
1449	unsigned int i;
1450
1451	for (i = 1; i < n; i++) {
1452		if (cmp_sys_reg(&table[i-1], &table[i]) >= 0) {
1453			kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1);
1454			return 1;
1455		}
1456	}
1457
1458	return 0;
1459}
1460
1461void kvm_sys_reg_table_init(void)
1462{
1463	unsigned int i;
1464	struct sys_reg_desc clidr;
1465
1466	/* Make sure tables are unique and in order. */
1467	BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs)));
1468	BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs)));
1469	BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs)));
1470	BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs)));
1471	BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs)));
1472	BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)));
1473
1474	/* We abuse the reset function to overwrite the table itself. */
1475	for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
1476		invariant_sys_regs[i].reset(NULL, &invariant_sys_regs[i]);
1477
1478	/*
1479	 * CLIDR format is awkward, so clean it up.  See ARM B4.1.20:
1480	 *
1481	 *   If software reads the Cache Type fields from Ctype1
1482	 *   upwards, once it has seen a value of 0b000, no caches
1483	 *   exist at further-out levels of the hierarchy. So, for
1484	 *   example, if Ctype3 is the first Cache Type field with a
1485	 *   value of 0b000, the values of Ctype4 to Ctype7 must be
1486	 *   ignored.
1487	 */
1488	get_clidr_el1(NULL, &clidr); /* Ugly... */
1489	cache_levels = clidr.val;
1490	for (i = 0; i < 7; i++)
1491		if (((cache_levels >> (i*3)) & 7) == 0)
1492			break;
1493	/* Clear all higher bits. */
1494	cache_levels &= (1 << (i*3))-1;
1495}
1496
1497/**
1498 * kvm_reset_sys_regs - sets system registers to reset value
1499 * @vcpu: The VCPU pointer
1500 *
1501 * This function finds the right table above and sets the registers on the
1502 * virtual CPU struct to their architecturally defined reset values.
1503 */
1504void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
1505{
1506	size_t num;
1507	const struct sys_reg_desc *table;
1508
1509	/* Catch someone adding a register without putting in reset entry. */
1510	memset(&vcpu->arch.ctxt.sys_regs, 0x42, sizeof(vcpu->arch.ctxt.sys_regs));
1511
1512	/* Generic chip reset first (so target could override). */
1513	reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
1514
1515	table = get_target_table(vcpu->arch.target, true, &num);
1516	reset_sys_reg_descs(vcpu, table, num);
1517
1518	for (num = 1; num < NR_SYS_REGS; num++)
1519		if (vcpu_sys_reg(vcpu, num) == 0x4242424242424242)
1520			panic("Didn't reset vcpu_sys_reg(%zi)", num);
1521}
1522