1/*
2 * hosting zSeries kernel virtual machines
3 *
4 * Copyright IBM Corp. 2008, 2009
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 *    Author(s): Carsten Otte <cotte@de.ibm.com>
11 *               Christian Borntraeger <borntraeger@de.ibm.com>
12 *               Heiko Carstens <heiko.carstens@de.ibm.com>
13 *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14 *               Jason J. Herne <jjherne@us.ibm.com>
15 */
16
17#include <linux/compiler.h>
18#include <linux/err.h>
19#include <linux/fs.h>
20#include <linux/hrtimer.h>
21#include <linux/init.h>
22#include <linux/kvm.h>
23#include <linux/kvm_host.h>
24#include <linux/module.h>
25#include <linux/random.h>
26#include <linux/slab.h>
27#include <linux/timer.h>
28#include <linux/vmalloc.h>
29#include <asm/asm-offsets.h>
30#include <asm/lowcore.h>
31#include <asm/pgtable.h>
32#include <asm/nmi.h>
33#include <asm/switch_to.h>
34#include <asm/isc.h>
35#include <asm/sclp.h>
36#include "kvm-s390.h"
37#include "gaccess.h"
38
39#define CREATE_TRACE_POINTS
40#include "trace.h"
41#include "trace-s390.h"
42
43#define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
44#define LOCAL_IRQS 32
45#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
46			   (KVM_MAX_VCPUS + LOCAL_IRQS))
47
48#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
49
50struct kvm_stats_debugfs_item debugfs_entries[] = {
51	{ "userspace_handled", VCPU_STAT(exit_userspace) },
52	{ "exit_null", VCPU_STAT(exit_null) },
53	{ "exit_validity", VCPU_STAT(exit_validity) },
54	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
55	{ "exit_external_request", VCPU_STAT(exit_external_request) },
56	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
57	{ "exit_instruction", VCPU_STAT(exit_instruction) },
58	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
59	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
60	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
61	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
62	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
63	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
64	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
65	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
66	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
67	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
68	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
69	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
70	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
71	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
72	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
73	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
74	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
75	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
76	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
77	{ "instruction_spx", VCPU_STAT(instruction_spx) },
78	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
79	{ "instruction_stap", VCPU_STAT(instruction_stap) },
80	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
81	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
82	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
83	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
84	{ "instruction_essa", VCPU_STAT(instruction_essa) },
85	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
86	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
87	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
88	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
89	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
90	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
91	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
92	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
93	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
94	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
95	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
96	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
97	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
98	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
99	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
100	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
101	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
102	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
103	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
104	{ "diagnose_10", VCPU_STAT(diagnose_10) },
105	{ "diagnose_44", VCPU_STAT(diagnose_44) },
106	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
107	{ NULL }
108};
109
110/* upper facilities limit for kvm */
111unsigned long kvm_s390_fac_list_mask[] = {
112	0xffe6fffbfcfdfc40UL,
113	0x005c800000000000UL,
114};
115
116unsigned long kvm_s390_fac_list_mask_size(void)
117{
118	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
119	return ARRAY_SIZE(kvm_s390_fac_list_mask);
120}
121
122static struct gmap_notifier gmap_notifier;
123
124/* Section: not file related */
125int kvm_arch_hardware_enable(void)
126{
127	/* every s390 is virtualization enabled ;-) */
128	return 0;
129}
130
131static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
132
133int kvm_arch_hardware_setup(void)
134{
135	gmap_notifier.notifier_call = kvm_gmap_notifier;
136	gmap_register_ipte_notifier(&gmap_notifier);
137	return 0;
138}
139
140void kvm_arch_hardware_unsetup(void)
141{
142	gmap_unregister_ipte_notifier(&gmap_notifier);
143}
144
145int kvm_arch_init(void *opaque)
146{
147	/* Register floating interrupt controller interface. */
148	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
149}
150
151/* Section: device related */
152long kvm_arch_dev_ioctl(struct file *filp,
153			unsigned int ioctl, unsigned long arg)
154{
155	if (ioctl == KVM_S390_ENABLE_SIE)
156		return s390_enable_sie();
157	return -EINVAL;
158}
159
160int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
161{
162	int r;
163
164	switch (ext) {
165	case KVM_CAP_S390_PSW:
166	case KVM_CAP_S390_GMAP:
167	case KVM_CAP_SYNC_MMU:
168#ifdef CONFIG_KVM_S390_UCONTROL
169	case KVM_CAP_S390_UCONTROL:
170#endif
171	case KVM_CAP_ASYNC_PF:
172	case KVM_CAP_SYNC_REGS:
173	case KVM_CAP_ONE_REG:
174	case KVM_CAP_ENABLE_CAP:
175	case KVM_CAP_S390_CSS_SUPPORT:
176	case KVM_CAP_IOEVENTFD:
177	case KVM_CAP_DEVICE_CTRL:
178	case KVM_CAP_ENABLE_CAP_VM:
179	case KVM_CAP_S390_IRQCHIP:
180	case KVM_CAP_VM_ATTRIBUTES:
181	case KVM_CAP_MP_STATE:
182	case KVM_CAP_S390_INJECT_IRQ:
183	case KVM_CAP_S390_USER_SIGP:
184	case KVM_CAP_S390_USER_STSI:
185	case KVM_CAP_S390_SKEYS:
186	case KVM_CAP_S390_IRQ_STATE:
187		r = 1;
188		break;
189	case KVM_CAP_S390_MEM_OP:
190		r = MEM_OP_MAX_SIZE;
191		break;
192	case KVM_CAP_NR_VCPUS:
193	case KVM_CAP_MAX_VCPUS:
194		r = KVM_MAX_VCPUS;
195		break;
196	case KVM_CAP_NR_MEMSLOTS:
197		r = KVM_USER_MEM_SLOTS;
198		break;
199	case KVM_CAP_S390_COW:
200		r = MACHINE_HAS_ESOP;
201		break;
202	case KVM_CAP_S390_VECTOR_REGISTERS:
203		r = MACHINE_HAS_VX;
204		break;
205	default:
206		r = 0;
207	}
208	return r;
209}
210
211static void kvm_s390_sync_dirty_log(struct kvm *kvm,
212					struct kvm_memory_slot *memslot)
213{
214	gfn_t cur_gfn, last_gfn;
215	unsigned long address;
216	struct gmap *gmap = kvm->arch.gmap;
217
218	down_read(&gmap->mm->mmap_sem);
219	/* Loop over all guest pages */
220	last_gfn = memslot->base_gfn + memslot->npages;
221	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
222		address = gfn_to_hva_memslot(memslot, cur_gfn);
223
224		if (gmap_test_and_clear_dirty(address, gmap))
225			mark_page_dirty(kvm, cur_gfn);
226	}
227	up_read(&gmap->mm->mmap_sem);
228}
229
230/* Section: vm related */
231/*
232 * Get (and clear) the dirty memory log for a memory slot.
233 */
234int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
235			       struct kvm_dirty_log *log)
236{
237	int r;
238	unsigned long n;
239	struct kvm_memory_slot *memslot;
240	int is_dirty = 0;
241
242	mutex_lock(&kvm->slots_lock);
243
244	r = -EINVAL;
245	if (log->slot >= KVM_USER_MEM_SLOTS)
246		goto out;
247
248	memslot = id_to_memslot(kvm->memslots, log->slot);
249	r = -ENOENT;
250	if (!memslot->dirty_bitmap)
251		goto out;
252
253	kvm_s390_sync_dirty_log(kvm, memslot);
254	r = kvm_get_dirty_log(kvm, log, &is_dirty);
255	if (r)
256		goto out;
257
258	/* Clear the dirty log */
259	if (is_dirty) {
260		n = kvm_dirty_bitmap_bytes(memslot);
261		memset(memslot->dirty_bitmap, 0, n);
262	}
263	r = 0;
264out:
265	mutex_unlock(&kvm->slots_lock);
266	return r;
267}
268
269static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
270{
271	int r;
272
273	if (cap->flags)
274		return -EINVAL;
275
276	switch (cap->cap) {
277	case KVM_CAP_S390_IRQCHIP:
278		kvm->arch.use_irqchip = 1;
279		r = 0;
280		break;
281	case KVM_CAP_S390_USER_SIGP:
282		kvm->arch.user_sigp = 1;
283		r = 0;
284		break;
285	case KVM_CAP_S390_VECTOR_REGISTERS:
286		mutex_lock(&kvm->lock);
287		if (atomic_read(&kvm->online_vcpus)) {
288			r = -EBUSY;
289		} else if (MACHINE_HAS_VX) {
290			set_kvm_facility(kvm->arch.model.fac->mask, 129);
291			set_kvm_facility(kvm->arch.model.fac->list, 129);
292			r = 0;
293		} else
294			r = -EINVAL;
295		mutex_unlock(&kvm->lock);
296		break;
297	case KVM_CAP_S390_USER_STSI:
298		kvm->arch.user_stsi = 1;
299		r = 0;
300		break;
301	default:
302		r = -EINVAL;
303		break;
304	}
305	return r;
306}
307
308static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
309{
310	int ret;
311
312	switch (attr->attr) {
313	case KVM_S390_VM_MEM_LIMIT_SIZE:
314		ret = 0;
315		if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
316			ret = -EFAULT;
317		break;
318	default:
319		ret = -ENXIO;
320		break;
321	}
322	return ret;
323}
324
325static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
326{
327	int ret;
328	unsigned int idx;
329	switch (attr->attr) {
330	case KVM_S390_VM_MEM_ENABLE_CMMA:
331		ret = -EBUSY;
332		mutex_lock(&kvm->lock);
333		if (atomic_read(&kvm->online_vcpus) == 0) {
334			kvm->arch.use_cmma = 1;
335			ret = 0;
336		}
337		mutex_unlock(&kvm->lock);
338		break;
339	case KVM_S390_VM_MEM_CLR_CMMA:
340		mutex_lock(&kvm->lock);
341		idx = srcu_read_lock(&kvm->srcu);
342		s390_reset_cmma(kvm->arch.gmap->mm);
343		srcu_read_unlock(&kvm->srcu, idx);
344		mutex_unlock(&kvm->lock);
345		ret = 0;
346		break;
347	case KVM_S390_VM_MEM_LIMIT_SIZE: {
348		unsigned long new_limit;
349
350		if (kvm_is_ucontrol(kvm))
351			return -EINVAL;
352
353		if (get_user(new_limit, (u64 __user *)attr->addr))
354			return -EFAULT;
355
356		if (new_limit > kvm->arch.gmap->asce_end)
357			return -E2BIG;
358
359		ret = -EBUSY;
360		mutex_lock(&kvm->lock);
361		if (atomic_read(&kvm->online_vcpus) == 0) {
362			/* gmap_alloc will round the limit up */
363			struct gmap *new = gmap_alloc(current->mm, new_limit);
364
365			if (!new) {
366				ret = -ENOMEM;
367			} else {
368				gmap_free(kvm->arch.gmap);
369				new->private = kvm;
370				kvm->arch.gmap = new;
371				ret = 0;
372			}
373		}
374		mutex_unlock(&kvm->lock);
375		break;
376	}
377	default:
378		ret = -ENXIO;
379		break;
380	}
381	return ret;
382}
383
384static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
385
386static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
387{
388	struct kvm_vcpu *vcpu;
389	int i;
390
391	if (!test_kvm_facility(kvm, 76))
392		return -EINVAL;
393
394	mutex_lock(&kvm->lock);
395	switch (attr->attr) {
396	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
397		get_random_bytes(
398			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
399			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
400		kvm->arch.crypto.aes_kw = 1;
401		break;
402	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
403		get_random_bytes(
404			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
405			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
406		kvm->arch.crypto.dea_kw = 1;
407		break;
408	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
409		kvm->arch.crypto.aes_kw = 0;
410		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
411			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
412		break;
413	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
414		kvm->arch.crypto.dea_kw = 0;
415		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
416			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
417		break;
418	default:
419		mutex_unlock(&kvm->lock);
420		return -ENXIO;
421	}
422
423	kvm_for_each_vcpu(i, vcpu, kvm) {
424		kvm_s390_vcpu_crypto_setup(vcpu);
425		exit_sie(vcpu);
426	}
427	mutex_unlock(&kvm->lock);
428	return 0;
429}
430
431static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
432{
433	u8 gtod_high;
434
435	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
436					   sizeof(gtod_high)))
437		return -EFAULT;
438
439	if (gtod_high != 0)
440		return -EINVAL;
441
442	return 0;
443}
444
445static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
446{
447	struct kvm_vcpu *cur_vcpu;
448	unsigned int vcpu_idx;
449	u64 host_tod, gtod;
450	int r;
451
452	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
453		return -EFAULT;
454
455	r = store_tod_clock(&host_tod);
456	if (r)
457		return r;
458
459	mutex_lock(&kvm->lock);
460	kvm->arch.epoch = gtod - host_tod;
461	kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) {
462		cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
463		exit_sie(cur_vcpu);
464	}
465	mutex_unlock(&kvm->lock);
466	return 0;
467}
468
469static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
470{
471	int ret;
472
473	if (attr->flags)
474		return -EINVAL;
475
476	switch (attr->attr) {
477	case KVM_S390_VM_TOD_HIGH:
478		ret = kvm_s390_set_tod_high(kvm, attr);
479		break;
480	case KVM_S390_VM_TOD_LOW:
481		ret = kvm_s390_set_tod_low(kvm, attr);
482		break;
483	default:
484		ret = -ENXIO;
485		break;
486	}
487	return ret;
488}
489
490static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
491{
492	u8 gtod_high = 0;
493
494	if (copy_to_user((void __user *)attr->addr, &gtod_high,
495					 sizeof(gtod_high)))
496		return -EFAULT;
497
498	return 0;
499}
500
501static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
502{
503	u64 host_tod, gtod;
504	int r;
505
506	r = store_tod_clock(&host_tod);
507	if (r)
508		return r;
509
510	gtod = host_tod + kvm->arch.epoch;
511	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
512		return -EFAULT;
513
514	return 0;
515}
516
517static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
518{
519	int ret;
520
521	if (attr->flags)
522		return -EINVAL;
523
524	switch (attr->attr) {
525	case KVM_S390_VM_TOD_HIGH:
526		ret = kvm_s390_get_tod_high(kvm, attr);
527		break;
528	case KVM_S390_VM_TOD_LOW:
529		ret = kvm_s390_get_tod_low(kvm, attr);
530		break;
531	default:
532		ret = -ENXIO;
533		break;
534	}
535	return ret;
536}
537
538static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
539{
540	struct kvm_s390_vm_cpu_processor *proc;
541	int ret = 0;
542
543	mutex_lock(&kvm->lock);
544	if (atomic_read(&kvm->online_vcpus)) {
545		ret = -EBUSY;
546		goto out;
547	}
548	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
549	if (!proc) {
550		ret = -ENOMEM;
551		goto out;
552	}
553	if (!copy_from_user(proc, (void __user *)attr->addr,
554			    sizeof(*proc))) {
555		memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
556		       sizeof(struct cpuid));
557		kvm->arch.model.ibc = proc->ibc;
558		memcpy(kvm->arch.model.fac->list, proc->fac_list,
559		       S390_ARCH_FAC_LIST_SIZE_BYTE);
560	} else
561		ret = -EFAULT;
562	kfree(proc);
563out:
564	mutex_unlock(&kvm->lock);
565	return ret;
566}
567
568static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
569{
570	int ret = -ENXIO;
571
572	switch (attr->attr) {
573	case KVM_S390_VM_CPU_PROCESSOR:
574		ret = kvm_s390_set_processor(kvm, attr);
575		break;
576	}
577	return ret;
578}
579
580static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
581{
582	struct kvm_s390_vm_cpu_processor *proc;
583	int ret = 0;
584
585	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
586	if (!proc) {
587		ret = -ENOMEM;
588		goto out;
589	}
590	memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
591	proc->ibc = kvm->arch.model.ibc;
592	memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
593	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
594		ret = -EFAULT;
595	kfree(proc);
596out:
597	return ret;
598}
599
600static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
601{
602	struct kvm_s390_vm_cpu_machine *mach;
603	int ret = 0;
604
605	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
606	if (!mach) {
607		ret = -ENOMEM;
608		goto out;
609	}
610	get_cpu_id((struct cpuid *) &mach->cpuid);
611	mach->ibc = sclp_get_ibc();
612	memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
613	       S390_ARCH_FAC_LIST_SIZE_BYTE);
614	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
615	       S390_ARCH_FAC_LIST_SIZE_BYTE);
616	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
617		ret = -EFAULT;
618	kfree(mach);
619out:
620	return ret;
621}
622
623static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
624{
625	int ret = -ENXIO;
626
627	switch (attr->attr) {
628	case KVM_S390_VM_CPU_PROCESSOR:
629		ret = kvm_s390_get_processor(kvm, attr);
630		break;
631	case KVM_S390_VM_CPU_MACHINE:
632		ret = kvm_s390_get_machine(kvm, attr);
633		break;
634	}
635	return ret;
636}
637
638static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
639{
640	int ret;
641
642	switch (attr->group) {
643	case KVM_S390_VM_MEM_CTRL:
644		ret = kvm_s390_set_mem_control(kvm, attr);
645		break;
646	case KVM_S390_VM_TOD:
647		ret = kvm_s390_set_tod(kvm, attr);
648		break;
649	case KVM_S390_VM_CPU_MODEL:
650		ret = kvm_s390_set_cpu_model(kvm, attr);
651		break;
652	case KVM_S390_VM_CRYPTO:
653		ret = kvm_s390_vm_set_crypto(kvm, attr);
654		break;
655	default:
656		ret = -ENXIO;
657		break;
658	}
659
660	return ret;
661}
662
663static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
664{
665	int ret;
666
667	switch (attr->group) {
668	case KVM_S390_VM_MEM_CTRL:
669		ret = kvm_s390_get_mem_control(kvm, attr);
670		break;
671	case KVM_S390_VM_TOD:
672		ret = kvm_s390_get_tod(kvm, attr);
673		break;
674	case KVM_S390_VM_CPU_MODEL:
675		ret = kvm_s390_get_cpu_model(kvm, attr);
676		break;
677	default:
678		ret = -ENXIO;
679		break;
680	}
681
682	return ret;
683}
684
685static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
686{
687	int ret;
688
689	switch (attr->group) {
690	case KVM_S390_VM_MEM_CTRL:
691		switch (attr->attr) {
692		case KVM_S390_VM_MEM_ENABLE_CMMA:
693		case KVM_S390_VM_MEM_CLR_CMMA:
694		case KVM_S390_VM_MEM_LIMIT_SIZE:
695			ret = 0;
696			break;
697		default:
698			ret = -ENXIO;
699			break;
700		}
701		break;
702	case KVM_S390_VM_TOD:
703		switch (attr->attr) {
704		case KVM_S390_VM_TOD_LOW:
705		case KVM_S390_VM_TOD_HIGH:
706			ret = 0;
707			break;
708		default:
709			ret = -ENXIO;
710			break;
711		}
712		break;
713	case KVM_S390_VM_CPU_MODEL:
714		switch (attr->attr) {
715		case KVM_S390_VM_CPU_PROCESSOR:
716		case KVM_S390_VM_CPU_MACHINE:
717			ret = 0;
718			break;
719		default:
720			ret = -ENXIO;
721			break;
722		}
723		break;
724	case KVM_S390_VM_CRYPTO:
725		switch (attr->attr) {
726		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
727		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
728		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
729		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
730			ret = 0;
731			break;
732		default:
733			ret = -ENXIO;
734			break;
735		}
736		break;
737	default:
738		ret = -ENXIO;
739		break;
740	}
741
742	return ret;
743}
744
745static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
746{
747	uint8_t *keys;
748	uint64_t hva;
749	unsigned long curkey;
750	int i, r = 0;
751
752	if (args->flags != 0)
753		return -EINVAL;
754
755	/* Is this guest using storage keys? */
756	if (!mm_use_skey(current->mm))
757		return KVM_S390_GET_SKEYS_NONE;
758
759	/* Enforce sane limit on memory allocation */
760	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
761		return -EINVAL;
762
763	keys = kmalloc_array(args->count, sizeof(uint8_t),
764			     GFP_KERNEL | __GFP_NOWARN);
765	if (!keys)
766		keys = vmalloc(sizeof(uint8_t) * args->count);
767	if (!keys)
768		return -ENOMEM;
769
770	for (i = 0; i < args->count; i++) {
771		hva = gfn_to_hva(kvm, args->start_gfn + i);
772		if (kvm_is_error_hva(hva)) {
773			r = -EFAULT;
774			goto out;
775		}
776
777		curkey = get_guest_storage_key(current->mm, hva);
778		if (IS_ERR_VALUE(curkey)) {
779			r = curkey;
780			goto out;
781		}
782		keys[i] = curkey;
783	}
784
785	r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
786			 sizeof(uint8_t) * args->count);
787	if (r)
788		r = -EFAULT;
789out:
790	kvfree(keys);
791	return r;
792}
793
794static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
795{
796	uint8_t *keys;
797	uint64_t hva;
798	int i, r = 0;
799
800	if (args->flags != 0)
801		return -EINVAL;
802
803	/* Enforce sane limit on memory allocation */
804	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
805		return -EINVAL;
806
807	keys = kmalloc_array(args->count, sizeof(uint8_t),
808			     GFP_KERNEL | __GFP_NOWARN);
809	if (!keys)
810		keys = vmalloc(sizeof(uint8_t) * args->count);
811	if (!keys)
812		return -ENOMEM;
813
814	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
815			   sizeof(uint8_t) * args->count);
816	if (r) {
817		r = -EFAULT;
818		goto out;
819	}
820
821	/* Enable storage key handling for the guest */
822	s390_enable_skey();
823
824	for (i = 0; i < args->count; i++) {
825		hva = gfn_to_hva(kvm, args->start_gfn + i);
826		if (kvm_is_error_hva(hva)) {
827			r = -EFAULT;
828			goto out;
829		}
830
831		/* Lowest order bit is reserved */
832		if (keys[i] & 0x01) {
833			r = -EINVAL;
834			goto out;
835		}
836
837		r = set_guest_storage_key(current->mm, hva,
838					  (unsigned long)keys[i], 0);
839		if (r)
840			goto out;
841	}
842out:
843	kvfree(keys);
844	return r;
845}
846
847long kvm_arch_vm_ioctl(struct file *filp,
848		       unsigned int ioctl, unsigned long arg)
849{
850	struct kvm *kvm = filp->private_data;
851	void __user *argp = (void __user *)arg;
852	struct kvm_device_attr attr;
853	int r;
854
855	switch (ioctl) {
856	case KVM_S390_INTERRUPT: {
857		struct kvm_s390_interrupt s390int;
858
859		r = -EFAULT;
860		if (copy_from_user(&s390int, argp, sizeof(s390int)))
861			break;
862		r = kvm_s390_inject_vm(kvm, &s390int);
863		break;
864	}
865	case KVM_ENABLE_CAP: {
866		struct kvm_enable_cap cap;
867		r = -EFAULT;
868		if (copy_from_user(&cap, argp, sizeof(cap)))
869			break;
870		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
871		break;
872	}
873	case KVM_CREATE_IRQCHIP: {
874		struct kvm_irq_routing_entry routing;
875
876		r = -EINVAL;
877		if (kvm->arch.use_irqchip) {
878			/* Set up dummy routing. */
879			memset(&routing, 0, sizeof(routing));
880			kvm_set_irq_routing(kvm, &routing, 0, 0);
881			r = 0;
882		}
883		break;
884	}
885	case KVM_SET_DEVICE_ATTR: {
886		r = -EFAULT;
887		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
888			break;
889		r = kvm_s390_vm_set_attr(kvm, &attr);
890		break;
891	}
892	case KVM_GET_DEVICE_ATTR: {
893		r = -EFAULT;
894		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
895			break;
896		r = kvm_s390_vm_get_attr(kvm, &attr);
897		break;
898	}
899	case KVM_HAS_DEVICE_ATTR: {
900		r = -EFAULT;
901		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
902			break;
903		r = kvm_s390_vm_has_attr(kvm, &attr);
904		break;
905	}
906	case KVM_S390_GET_SKEYS: {
907		struct kvm_s390_skeys args;
908
909		r = -EFAULT;
910		if (copy_from_user(&args, argp,
911				   sizeof(struct kvm_s390_skeys)))
912			break;
913		r = kvm_s390_get_skeys(kvm, &args);
914		break;
915	}
916	case KVM_S390_SET_SKEYS: {
917		struct kvm_s390_skeys args;
918
919		r = -EFAULT;
920		if (copy_from_user(&args, argp,
921				   sizeof(struct kvm_s390_skeys)))
922			break;
923		r = kvm_s390_set_skeys(kvm, &args);
924		break;
925	}
926	default:
927		r = -ENOTTY;
928	}
929
930	return r;
931}
932
933static int kvm_s390_query_ap_config(u8 *config)
934{
935	u32 fcn_code = 0x04000000UL;
936	u32 cc = 0;
937
938	memset(config, 0, 128);
939	asm volatile(
940		"lgr 0,%1\n"
941		"lgr 2,%2\n"
942		".long 0xb2af0000\n"		/* PQAP(QCI) */
943		"0: ipm %0\n"
944		"srl %0,28\n"
945		"1:\n"
946		EX_TABLE(0b, 1b)
947		: "+r" (cc)
948		: "r" (fcn_code), "r" (config)
949		: "cc", "0", "2", "memory"
950	);
951
952	return cc;
953}
954
955static int kvm_s390_apxa_installed(void)
956{
957	u8 config[128];
958	int cc;
959
960	if (test_facility(2) && test_facility(12)) {
961		cc = kvm_s390_query_ap_config(config);
962
963		if (cc)
964			pr_err("PQAP(QCI) failed with cc=%d", cc);
965		else
966			return config[0] & 0x40;
967	}
968
969	return 0;
970}
971
972static void kvm_s390_set_crycb_format(struct kvm *kvm)
973{
974	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
975
976	if (kvm_s390_apxa_installed())
977		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
978	else
979		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
980}
981
982static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
983{
984	get_cpu_id(cpu_id);
985	cpu_id->version = 0xff;
986}
987
988static int kvm_s390_crypto_init(struct kvm *kvm)
989{
990	if (!test_kvm_facility(kvm, 76))
991		return 0;
992
993	kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
994					 GFP_KERNEL | GFP_DMA);
995	if (!kvm->arch.crypto.crycb)
996		return -ENOMEM;
997
998	kvm_s390_set_crycb_format(kvm);
999
1000	/* Enable AES/DEA protected key functions by default */
1001	kvm->arch.crypto.aes_kw = 1;
1002	kvm->arch.crypto.dea_kw = 1;
1003	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1004			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1005	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1006			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1007
1008	return 0;
1009}
1010
1011int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1012{
1013	int i, rc;
1014	char debug_name[16];
1015	static unsigned long sca_offset;
1016
1017	rc = -EINVAL;
1018#ifdef CONFIG_KVM_S390_UCONTROL
1019	if (type & ~KVM_VM_S390_UCONTROL)
1020		goto out_err;
1021	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1022		goto out_err;
1023#else
1024	if (type)
1025		goto out_err;
1026#endif
1027
1028	rc = s390_enable_sie();
1029	if (rc)
1030		goto out_err;
1031
1032	rc = -ENOMEM;
1033
1034	kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1035	if (!kvm->arch.sca)
1036		goto out_err;
1037	spin_lock(&kvm_lock);
1038	sca_offset += 16;
1039	if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE)
1040		sca_offset = 0;
1041	kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1042	spin_unlock(&kvm_lock);
1043
1044	sprintf(debug_name, "kvm-%u", current->pid);
1045
1046	kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
1047	if (!kvm->arch.dbf)
1048		goto out_err;
1049
1050	/*
1051	 * The architectural maximum amount of facilities is 16 kbit. To store
1052	 * this amount, 2 kbyte of memory is required. Thus we need a full
1053	 * page to hold the guest facility list (arch.model.fac->list) and the
1054	 * facility mask (arch.model.fac->mask). Its address size has to be
1055	 * 31 bits and word aligned.
1056	 */
1057	kvm->arch.model.fac =
1058		(struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1059	if (!kvm->arch.model.fac)
1060		goto out_err;
1061
1062	/* Populate the facility mask initially. */
1063	memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1064	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1065	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1066		if (i < kvm_s390_fac_list_mask_size())
1067			kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1068		else
1069			kvm->arch.model.fac->mask[i] = 0UL;
1070	}
1071
1072	/* Populate the facility list initially. */
1073	memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1074	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1075
1076	kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1077	kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff;
1078
1079	if (kvm_s390_crypto_init(kvm) < 0)
1080		goto out_err;
1081
1082	spin_lock_init(&kvm->arch.float_int.lock);
1083	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1084		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1085	init_waitqueue_head(&kvm->arch.ipte_wq);
1086	mutex_init(&kvm->arch.ipte_mutex);
1087
1088	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1089	VM_EVENT(kvm, 3, "%s", "vm created");
1090
1091	if (type & KVM_VM_S390_UCONTROL) {
1092		kvm->arch.gmap = NULL;
1093	} else {
1094		kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1095		if (!kvm->arch.gmap)
1096			goto out_err;
1097		kvm->arch.gmap->private = kvm;
1098		kvm->arch.gmap->pfault_enabled = 0;
1099	}
1100
1101	kvm->arch.css_support = 0;
1102	kvm->arch.use_irqchip = 0;
1103	kvm->arch.epoch = 0;
1104
1105	spin_lock_init(&kvm->arch.start_stop_lock);
1106
1107	return 0;
1108out_err:
1109	kfree(kvm->arch.crypto.crycb);
1110	free_page((unsigned long)kvm->arch.model.fac);
1111	debug_unregister(kvm->arch.dbf);
1112	free_page((unsigned long)(kvm->arch.sca));
1113	return rc;
1114}
1115
1116void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1117{
1118	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1119	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1120	kvm_s390_clear_local_irqs(vcpu);
1121	kvm_clear_async_pf_completion_queue(vcpu);
1122	if (!kvm_is_ucontrol(vcpu->kvm)) {
1123		clear_bit(63 - vcpu->vcpu_id,
1124			  (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1125		if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1126		    (__u64) vcpu->arch.sie_block)
1127			vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1128	}
1129	smp_mb();
1130
1131	if (kvm_is_ucontrol(vcpu->kvm))
1132		gmap_free(vcpu->arch.gmap);
1133
1134	if (kvm_s390_cmma_enabled(vcpu->kvm))
1135		kvm_s390_vcpu_unsetup_cmma(vcpu);
1136	free_page((unsigned long)(vcpu->arch.sie_block));
1137
1138	kvm_vcpu_uninit(vcpu);
1139	kmem_cache_free(kvm_vcpu_cache, vcpu);
1140}
1141
1142static void kvm_free_vcpus(struct kvm *kvm)
1143{
1144	unsigned int i;
1145	struct kvm_vcpu *vcpu;
1146
1147	kvm_for_each_vcpu(i, vcpu, kvm)
1148		kvm_arch_vcpu_destroy(vcpu);
1149
1150	mutex_lock(&kvm->lock);
1151	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1152		kvm->vcpus[i] = NULL;
1153
1154	atomic_set(&kvm->online_vcpus, 0);
1155	mutex_unlock(&kvm->lock);
1156}
1157
1158void kvm_arch_destroy_vm(struct kvm *kvm)
1159{
1160	kvm_free_vcpus(kvm);
1161	free_page((unsigned long)kvm->arch.model.fac);
1162	free_page((unsigned long)(kvm->arch.sca));
1163	debug_unregister(kvm->arch.dbf);
1164	kfree(kvm->arch.crypto.crycb);
1165	if (!kvm_is_ucontrol(kvm))
1166		gmap_free(kvm->arch.gmap);
1167	kvm_s390_destroy_adapters(kvm);
1168	kvm_s390_clear_float_irqs(kvm);
1169}
1170
1171/* Section: vcpu related */
1172static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1173{
1174	vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1175	if (!vcpu->arch.gmap)
1176		return -ENOMEM;
1177	vcpu->arch.gmap->private = vcpu->kvm;
1178
1179	return 0;
1180}
1181
1182int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1183{
1184	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1185	kvm_clear_async_pf_completion_queue(vcpu);
1186	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1187				    KVM_SYNC_GPRS |
1188				    KVM_SYNC_ACRS |
1189				    KVM_SYNC_CRS |
1190				    KVM_SYNC_ARCH0 |
1191				    KVM_SYNC_PFAULT;
1192	if (test_kvm_facility(vcpu->kvm, 129))
1193		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1194
1195	if (kvm_is_ucontrol(vcpu->kvm))
1196		return __kvm_ucontrol_vcpu_init(vcpu);
1197
1198	return 0;
1199}
1200
1201void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1202{
1203	save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
1204	if (test_kvm_facility(vcpu->kvm, 129))
1205		save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
1206	else
1207		save_fp_regs(vcpu->arch.host_fpregs.fprs);
1208	save_access_regs(vcpu->arch.host_acrs);
1209	if (test_kvm_facility(vcpu->kvm, 129)) {
1210		restore_fp_ctl(&vcpu->run->s.regs.fpc);
1211		restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
1212	} else {
1213		restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1214		restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
1215	}
1216	restore_access_regs(vcpu->run->s.regs.acrs);
1217	gmap_enable(vcpu->arch.gmap);
1218	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1219}
1220
1221void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1222{
1223	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1224	gmap_disable(vcpu->arch.gmap);
1225	if (test_kvm_facility(vcpu->kvm, 129)) {
1226		save_fp_ctl(&vcpu->run->s.regs.fpc);
1227		save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
1228	} else {
1229		save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1230		save_fp_regs(vcpu->arch.guest_fpregs.fprs);
1231	}
1232	save_access_regs(vcpu->run->s.regs.acrs);
1233	restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
1234	if (test_kvm_facility(vcpu->kvm, 129))
1235		restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
1236	else
1237		restore_fp_regs(vcpu->arch.host_fpregs.fprs);
1238	restore_access_regs(vcpu->arch.host_acrs);
1239}
1240
1241static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1242{
1243	/* this equals initial cpu reset in pop, but we don't switch to ESA */
1244	vcpu->arch.sie_block->gpsw.mask = 0UL;
1245	vcpu->arch.sie_block->gpsw.addr = 0UL;
1246	kvm_s390_set_prefix(vcpu, 0);
1247	vcpu->arch.sie_block->cputm     = 0UL;
1248	vcpu->arch.sie_block->ckc       = 0UL;
1249	vcpu->arch.sie_block->todpr     = 0;
1250	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1251	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1252	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1253	vcpu->arch.guest_fpregs.fpc = 0;
1254	asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1255	vcpu->arch.sie_block->gbea = 1;
1256	vcpu->arch.sie_block->pp = 0;
1257	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1258	kvm_clear_async_pf_completion_queue(vcpu);
1259	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1260		kvm_s390_vcpu_stop(vcpu);
1261	kvm_s390_clear_local_irqs(vcpu);
1262}
1263
1264void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1265{
1266	mutex_lock(&vcpu->kvm->lock);
1267	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1268	mutex_unlock(&vcpu->kvm->lock);
1269	if (!kvm_is_ucontrol(vcpu->kvm))
1270		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1271}
1272
1273static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1274{
1275	if (!test_kvm_facility(vcpu->kvm, 76))
1276		return;
1277
1278	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1279
1280	if (vcpu->kvm->arch.crypto.aes_kw)
1281		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1282	if (vcpu->kvm->arch.crypto.dea_kw)
1283		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1284
1285	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1286}
1287
1288void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1289{
1290	free_page(vcpu->arch.sie_block->cbrlo);
1291	vcpu->arch.sie_block->cbrlo = 0;
1292}
1293
1294int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1295{
1296	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1297	if (!vcpu->arch.sie_block->cbrlo)
1298		return -ENOMEM;
1299
1300	vcpu->arch.sie_block->ecb2 |= 0x80;
1301	vcpu->arch.sie_block->ecb2 &= ~0x08;
1302	return 0;
1303}
1304
1305static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1306{
1307	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1308
1309	vcpu->arch.cpu_id = model->cpu_id;
1310	vcpu->arch.sie_block->ibc = model->ibc;
1311	vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1312}
1313
1314int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1315{
1316	int rc = 0;
1317
1318	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1319						    CPUSTAT_SM |
1320						    CPUSTAT_STOPPED |
1321						    CPUSTAT_GED);
1322	kvm_s390_vcpu_setup_model(vcpu);
1323
1324	vcpu->arch.sie_block->ecb   = 6;
1325	if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1326		vcpu->arch.sie_block->ecb |= 0x10;
1327
1328	vcpu->arch.sie_block->ecb2  = 8;
1329	vcpu->arch.sie_block->eca   = 0xC1002000U;
1330	if (sclp_has_siif())
1331		vcpu->arch.sie_block->eca |= 1;
1332	if (sclp_has_sigpif())
1333		vcpu->arch.sie_block->eca |= 0x10000000U;
1334	if (test_kvm_facility(vcpu->kvm, 129)) {
1335		vcpu->arch.sie_block->eca |= 0x00020000;
1336		vcpu->arch.sie_block->ecd |= 0x20000000;
1337	}
1338	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1339
1340	if (kvm_s390_cmma_enabled(vcpu->kvm)) {
1341		rc = kvm_s390_vcpu_setup_cmma(vcpu);
1342		if (rc)
1343			return rc;
1344	}
1345	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1346	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1347
1348	kvm_s390_vcpu_crypto_setup(vcpu);
1349
1350	return rc;
1351}
1352
1353struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1354				      unsigned int id)
1355{
1356	struct kvm_vcpu *vcpu;
1357	struct sie_page *sie_page;
1358	int rc = -EINVAL;
1359
1360	if (id >= KVM_MAX_VCPUS)
1361		goto out;
1362
1363	rc = -ENOMEM;
1364
1365	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1366	if (!vcpu)
1367		goto out;
1368
1369	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1370	if (!sie_page)
1371		goto out_free_cpu;
1372
1373	vcpu->arch.sie_block = &sie_page->sie_block;
1374	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1375	vcpu->arch.host_vregs = &sie_page->vregs;
1376
1377	vcpu->arch.sie_block->icpua = id;
1378	if (!kvm_is_ucontrol(kvm)) {
1379		if (!kvm->arch.sca) {
1380			WARN_ON_ONCE(1);
1381			goto out_free_cpu;
1382		}
1383		if (!kvm->arch.sca->cpu[id].sda)
1384			kvm->arch.sca->cpu[id].sda =
1385				(__u64) vcpu->arch.sie_block;
1386		vcpu->arch.sie_block->scaoh =
1387			(__u32)(((__u64)kvm->arch.sca) >> 32);
1388		vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1389		set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1390	}
1391
1392	spin_lock_init(&vcpu->arch.local_int.lock);
1393	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1394	vcpu->arch.local_int.wq = &vcpu->wq;
1395	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1396
1397	rc = kvm_vcpu_init(vcpu, kvm, id);
1398	if (rc)
1399		goto out_free_sie_block;
1400	VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1401		 vcpu->arch.sie_block);
1402	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1403
1404	return vcpu;
1405out_free_sie_block:
1406	free_page((unsigned long)(vcpu->arch.sie_block));
1407out_free_cpu:
1408	kmem_cache_free(kvm_vcpu_cache, vcpu);
1409out:
1410	return ERR_PTR(rc);
1411}
1412
1413int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1414{
1415	return kvm_s390_vcpu_has_irq(vcpu, 0);
1416}
1417
1418void s390_vcpu_block(struct kvm_vcpu *vcpu)
1419{
1420	atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1421}
1422
1423void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1424{
1425	atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1426}
1427
1428/*
1429 * Kick a guest cpu out of SIE and wait until SIE is not running.
1430 * If the CPU is not running (e.g. waiting as idle) the function will
1431 * return immediately. */
1432void exit_sie(struct kvm_vcpu *vcpu)
1433{
1434	atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1435	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1436		cpu_relax();
1437}
1438
1439/* Kick a guest cpu out of SIE and prevent SIE-reentry */
1440void exit_sie_sync(struct kvm_vcpu *vcpu)
1441{
1442	s390_vcpu_block(vcpu);
1443	exit_sie(vcpu);
1444}
1445
1446static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1447{
1448	int i;
1449	struct kvm *kvm = gmap->private;
1450	struct kvm_vcpu *vcpu;
1451
1452	kvm_for_each_vcpu(i, vcpu, kvm) {
1453		/* match against both prefix pages */
1454		if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1455			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1456			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
1457			exit_sie_sync(vcpu);
1458		}
1459	}
1460}
1461
1462int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1463{
1464	/* kvm common code refers to this, but never calls it */
1465	BUG();
1466	return 0;
1467}
1468
1469static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1470					   struct kvm_one_reg *reg)
1471{
1472	int r = -EINVAL;
1473
1474	switch (reg->id) {
1475	case KVM_REG_S390_TODPR:
1476		r = put_user(vcpu->arch.sie_block->todpr,
1477			     (u32 __user *)reg->addr);
1478		break;
1479	case KVM_REG_S390_EPOCHDIFF:
1480		r = put_user(vcpu->arch.sie_block->epoch,
1481			     (u64 __user *)reg->addr);
1482		break;
1483	case KVM_REG_S390_CPU_TIMER:
1484		r = put_user(vcpu->arch.sie_block->cputm,
1485			     (u64 __user *)reg->addr);
1486		break;
1487	case KVM_REG_S390_CLOCK_COMP:
1488		r = put_user(vcpu->arch.sie_block->ckc,
1489			     (u64 __user *)reg->addr);
1490		break;
1491	case KVM_REG_S390_PFTOKEN:
1492		r = put_user(vcpu->arch.pfault_token,
1493			     (u64 __user *)reg->addr);
1494		break;
1495	case KVM_REG_S390_PFCOMPARE:
1496		r = put_user(vcpu->arch.pfault_compare,
1497			     (u64 __user *)reg->addr);
1498		break;
1499	case KVM_REG_S390_PFSELECT:
1500		r = put_user(vcpu->arch.pfault_select,
1501			     (u64 __user *)reg->addr);
1502		break;
1503	case KVM_REG_S390_PP:
1504		r = put_user(vcpu->arch.sie_block->pp,
1505			     (u64 __user *)reg->addr);
1506		break;
1507	case KVM_REG_S390_GBEA:
1508		r = put_user(vcpu->arch.sie_block->gbea,
1509			     (u64 __user *)reg->addr);
1510		break;
1511	default:
1512		break;
1513	}
1514
1515	return r;
1516}
1517
1518static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1519					   struct kvm_one_reg *reg)
1520{
1521	int r = -EINVAL;
1522
1523	switch (reg->id) {
1524	case KVM_REG_S390_TODPR:
1525		r = get_user(vcpu->arch.sie_block->todpr,
1526			     (u32 __user *)reg->addr);
1527		break;
1528	case KVM_REG_S390_EPOCHDIFF:
1529		r = get_user(vcpu->arch.sie_block->epoch,
1530			     (u64 __user *)reg->addr);
1531		break;
1532	case KVM_REG_S390_CPU_TIMER:
1533		r = get_user(vcpu->arch.sie_block->cputm,
1534			     (u64 __user *)reg->addr);
1535		break;
1536	case KVM_REG_S390_CLOCK_COMP:
1537		r = get_user(vcpu->arch.sie_block->ckc,
1538			     (u64 __user *)reg->addr);
1539		break;
1540	case KVM_REG_S390_PFTOKEN:
1541		r = get_user(vcpu->arch.pfault_token,
1542			     (u64 __user *)reg->addr);
1543		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1544			kvm_clear_async_pf_completion_queue(vcpu);
1545		break;
1546	case KVM_REG_S390_PFCOMPARE:
1547		r = get_user(vcpu->arch.pfault_compare,
1548			     (u64 __user *)reg->addr);
1549		break;
1550	case KVM_REG_S390_PFSELECT:
1551		r = get_user(vcpu->arch.pfault_select,
1552			     (u64 __user *)reg->addr);
1553		break;
1554	case KVM_REG_S390_PP:
1555		r = get_user(vcpu->arch.sie_block->pp,
1556			     (u64 __user *)reg->addr);
1557		break;
1558	case KVM_REG_S390_GBEA:
1559		r = get_user(vcpu->arch.sie_block->gbea,
1560			     (u64 __user *)reg->addr);
1561		break;
1562	default:
1563		break;
1564	}
1565
1566	return r;
1567}
1568
1569static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1570{
1571	kvm_s390_vcpu_initial_reset(vcpu);
1572	return 0;
1573}
1574
1575int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1576{
1577	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1578	return 0;
1579}
1580
1581int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1582{
1583	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1584	return 0;
1585}
1586
1587int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1588				  struct kvm_sregs *sregs)
1589{
1590	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1591	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1592	restore_access_regs(vcpu->run->s.regs.acrs);
1593	return 0;
1594}
1595
1596int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1597				  struct kvm_sregs *sregs)
1598{
1599	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1600	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1601	return 0;
1602}
1603
1604int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1605{
1606	if (test_fp_ctl(fpu->fpc))
1607		return -EINVAL;
1608	memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1609	vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1610	restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1611	restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
1612	return 0;
1613}
1614
1615int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1616{
1617	memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1618	fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1619	return 0;
1620}
1621
1622static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1623{
1624	int rc = 0;
1625
1626	if (!is_vcpu_stopped(vcpu))
1627		rc = -EBUSY;
1628	else {
1629		vcpu->run->psw_mask = psw.mask;
1630		vcpu->run->psw_addr = psw.addr;
1631	}
1632	return rc;
1633}
1634
1635int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1636				  struct kvm_translation *tr)
1637{
1638	return -EINVAL; /* not implemented yet */
1639}
1640
1641#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1642			      KVM_GUESTDBG_USE_HW_BP | \
1643			      KVM_GUESTDBG_ENABLE)
1644
1645int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1646					struct kvm_guest_debug *dbg)
1647{
1648	int rc = 0;
1649
1650	vcpu->guest_debug = 0;
1651	kvm_s390_clear_bp_data(vcpu);
1652
1653	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1654		return -EINVAL;
1655
1656	if (dbg->control & KVM_GUESTDBG_ENABLE) {
1657		vcpu->guest_debug = dbg->control;
1658		/* enforce guest PER */
1659		atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1660
1661		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1662			rc = kvm_s390_import_bp_data(vcpu, dbg);
1663	} else {
1664		atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1665		vcpu->arch.guestdbg.last_bp = 0;
1666	}
1667
1668	if (rc) {
1669		vcpu->guest_debug = 0;
1670		kvm_s390_clear_bp_data(vcpu);
1671		atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1672	}
1673
1674	return rc;
1675}
1676
1677int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1678				    struct kvm_mp_state *mp_state)
1679{
1680	/* CHECK_STOP and LOAD are not supported yet */
1681	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1682				       KVM_MP_STATE_OPERATING;
1683}
1684
1685int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1686				    struct kvm_mp_state *mp_state)
1687{
1688	int rc = 0;
1689
1690	/* user space knows about this interface - let it control the state */
1691	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1692
1693	switch (mp_state->mp_state) {
1694	case KVM_MP_STATE_STOPPED:
1695		kvm_s390_vcpu_stop(vcpu);
1696		break;
1697	case KVM_MP_STATE_OPERATING:
1698		kvm_s390_vcpu_start(vcpu);
1699		break;
1700	case KVM_MP_STATE_LOAD:
1701	case KVM_MP_STATE_CHECK_STOP:
1702		/* fall through - CHECK_STOP and LOAD are not supported yet */
1703	default:
1704		rc = -ENXIO;
1705	}
1706
1707	return rc;
1708}
1709
1710bool kvm_s390_cmma_enabled(struct kvm *kvm)
1711{
1712	if (!MACHINE_IS_LPAR)
1713		return false;
1714	/* only enable for z10 and later */
1715	if (!MACHINE_HAS_EDAT1)
1716		return false;
1717	if (!kvm->arch.use_cmma)
1718		return false;
1719	return true;
1720}
1721
1722static bool ibs_enabled(struct kvm_vcpu *vcpu)
1723{
1724	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1725}
1726
1727static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1728{
1729retry:
1730	s390_vcpu_unblock(vcpu);
1731	/*
1732	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
1733	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1734	 * This ensures that the ipte instruction for this request has
1735	 * already finished. We might race against a second unmapper that
1736	 * wants to set the blocking bit. Lets just retry the request loop.
1737	 */
1738	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1739		int rc;
1740		rc = gmap_ipte_notify(vcpu->arch.gmap,
1741				      kvm_s390_get_prefix(vcpu),
1742				      PAGE_SIZE * 2);
1743		if (rc)
1744			return rc;
1745		goto retry;
1746	}
1747
1748	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1749		vcpu->arch.sie_block->ihcpu = 0xffff;
1750		goto retry;
1751	}
1752
1753	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1754		if (!ibs_enabled(vcpu)) {
1755			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1756			atomic_set_mask(CPUSTAT_IBS,
1757					&vcpu->arch.sie_block->cpuflags);
1758		}
1759		goto retry;
1760	}
1761
1762	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1763		if (ibs_enabled(vcpu)) {
1764			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1765			atomic_clear_mask(CPUSTAT_IBS,
1766					  &vcpu->arch.sie_block->cpuflags);
1767		}
1768		goto retry;
1769	}
1770
1771	/* nothing to do, just clear the request */
1772	clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1773
1774	return 0;
1775}
1776
1777/**
1778 * kvm_arch_fault_in_page - fault-in guest page if necessary
1779 * @vcpu: The corresponding virtual cpu
1780 * @gpa: Guest physical address
1781 * @writable: Whether the page should be writable or not
1782 *
1783 * Make sure that a guest page has been faulted-in on the host.
1784 *
1785 * Return: Zero on success, negative error code otherwise.
1786 */
1787long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1788{
1789	return gmap_fault(vcpu->arch.gmap, gpa,
1790			  writable ? FAULT_FLAG_WRITE : 0);
1791}
1792
1793static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1794				      unsigned long token)
1795{
1796	struct kvm_s390_interrupt inti;
1797	struct kvm_s390_irq irq;
1798
1799	if (start_token) {
1800		irq.u.ext.ext_params2 = token;
1801		irq.type = KVM_S390_INT_PFAULT_INIT;
1802		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1803	} else {
1804		inti.type = KVM_S390_INT_PFAULT_DONE;
1805		inti.parm64 = token;
1806		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1807	}
1808}
1809
1810void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1811				     struct kvm_async_pf *work)
1812{
1813	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1814	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1815}
1816
1817void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1818				 struct kvm_async_pf *work)
1819{
1820	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1821	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1822}
1823
1824void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1825			       struct kvm_async_pf *work)
1826{
1827	/* s390 will always inject the page directly */
1828}
1829
1830bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1831{
1832	/*
1833	 * s390 will always inject the page directly,
1834	 * but we still want check_async_completion to cleanup
1835	 */
1836	return true;
1837}
1838
1839static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1840{
1841	hva_t hva;
1842	struct kvm_arch_async_pf arch;
1843	int rc;
1844
1845	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1846		return 0;
1847	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1848	    vcpu->arch.pfault_compare)
1849		return 0;
1850	if (psw_extint_disabled(vcpu))
1851		return 0;
1852	if (kvm_s390_vcpu_has_irq(vcpu, 0))
1853		return 0;
1854	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1855		return 0;
1856	if (!vcpu->arch.gmap->pfault_enabled)
1857		return 0;
1858
1859	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
1860	hva += current->thread.gmap_addr & ~PAGE_MASK;
1861	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
1862		return 0;
1863
1864	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
1865	return rc;
1866}
1867
1868static int vcpu_pre_run(struct kvm_vcpu *vcpu)
1869{
1870	int rc, cpuflags;
1871
1872	/*
1873	 * On s390 notifications for arriving pages will be delivered directly
1874	 * to the guest but the house keeping for completed pfaults is
1875	 * handled outside the worker.
1876	 */
1877	kvm_check_async_pf_completion(vcpu);
1878
1879	memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
1880
1881	if (need_resched())
1882		schedule();
1883
1884	if (test_cpu_flag(CIF_MCCK_PENDING))
1885		s390_handle_mcck();
1886
1887	if (!kvm_is_ucontrol(vcpu->kvm)) {
1888		rc = kvm_s390_deliver_pending_interrupts(vcpu);
1889		if (rc)
1890			return rc;
1891	}
1892
1893	rc = kvm_s390_handle_requests(vcpu);
1894	if (rc)
1895		return rc;
1896
1897	if (guestdbg_enabled(vcpu)) {
1898		kvm_s390_backup_guest_per_regs(vcpu);
1899		kvm_s390_patch_guest_per_regs(vcpu);
1900	}
1901
1902	vcpu->arch.sie_block->icptcode = 0;
1903	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
1904	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
1905	trace_kvm_s390_sie_enter(vcpu, cpuflags);
1906
1907	return 0;
1908}
1909
1910static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
1911{
1912	psw_t *psw = &vcpu->arch.sie_block->gpsw;
1913	u8 opcode;
1914	int rc;
1915
1916	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
1917	trace_kvm_s390_sie_fault(vcpu);
1918
1919	/*
1920	 * We want to inject an addressing exception, which is defined as a
1921	 * suppressing or terminating exception. However, since we came here
1922	 * by a DAT access exception, the PSW still points to the faulting
1923	 * instruction since DAT exceptions are nullifying. So we've got
1924	 * to look up the current opcode to get the length of the instruction
1925	 * to be able to forward the PSW.
1926	 */
1927	rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
1928	if (rc)
1929		return kvm_s390_inject_prog_cond(vcpu, rc);
1930	psw->addr = __rewind_psw(*psw, -insn_length(opcode));
1931
1932	return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
1933}
1934
1935static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
1936{
1937	int rc = -1;
1938
1939	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
1940		   vcpu->arch.sie_block->icptcode);
1941	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
1942
1943	if (guestdbg_enabled(vcpu))
1944		kvm_s390_restore_guest_per_regs(vcpu);
1945
1946	if (exit_reason >= 0) {
1947		rc = 0;
1948	} else if (kvm_is_ucontrol(vcpu->kvm)) {
1949		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
1950		vcpu->run->s390_ucontrol.trans_exc_code =
1951						current->thread.gmap_addr;
1952		vcpu->run->s390_ucontrol.pgm_code = 0x10;
1953		rc = -EREMOTE;
1954
1955	} else if (current->thread.gmap_pfault) {
1956		trace_kvm_s390_major_guest_pfault(vcpu);
1957		current->thread.gmap_pfault = 0;
1958		if (kvm_arch_setup_async_pf(vcpu)) {
1959			rc = 0;
1960		} else {
1961			gpa_t gpa = current->thread.gmap_addr;
1962			rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
1963		}
1964	}
1965
1966	if (rc == -1)
1967		rc = vcpu_post_run_fault_in_sie(vcpu);
1968
1969	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
1970
1971	if (rc == 0) {
1972		if (kvm_is_ucontrol(vcpu->kvm))
1973			/* Don't exit for host interrupts. */
1974			rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
1975		else
1976			rc = kvm_handle_sie_intercept(vcpu);
1977	}
1978
1979	return rc;
1980}
1981
1982static int __vcpu_run(struct kvm_vcpu *vcpu)
1983{
1984	int rc, exit_reason;
1985
1986	/*
1987	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
1988	 * ning the guest), so that memslots (and other stuff) are protected
1989	 */
1990	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1991
1992	do {
1993		rc = vcpu_pre_run(vcpu);
1994		if (rc)
1995			break;
1996
1997		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
1998		/*
1999		 * As PF_VCPU will be used in fault handler, between
2000		 * guest_enter and guest_exit should be no uaccess.
2001		 */
2002		preempt_disable();
2003		kvm_guest_enter();
2004		preempt_enable();
2005		exit_reason = sie64a(vcpu->arch.sie_block,
2006				     vcpu->run->s.regs.gprs);
2007		kvm_guest_exit();
2008		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2009
2010		rc = vcpu_post_run(vcpu, exit_reason);
2011	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2012
2013	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2014	return rc;
2015}
2016
2017static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2018{
2019	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2020	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2021	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2022		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2023	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2024		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2025		/* some control register changes require a tlb flush */
2026		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2027	}
2028	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2029		vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2030		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2031		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2032		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2033		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2034	}
2035	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2036		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2037		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2038		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2039		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2040			kvm_clear_async_pf_completion_queue(vcpu);
2041	}
2042	kvm_run->kvm_dirty_regs = 0;
2043}
2044
2045static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2046{
2047	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2048	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2049	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2050	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2051	kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2052	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2053	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2054	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2055	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2056	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2057	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2058	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2059}
2060
2061int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2062{
2063	int rc;
2064	sigset_t sigsaved;
2065
2066	if (guestdbg_exit_pending(vcpu)) {
2067		kvm_s390_prepare_debug_exit(vcpu);
2068		return 0;
2069	}
2070
2071	if (vcpu->sigset_active)
2072		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2073
2074	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2075		kvm_s390_vcpu_start(vcpu);
2076	} else if (is_vcpu_stopped(vcpu)) {
2077		pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n",
2078				   vcpu->vcpu_id);
2079		return -EINVAL;
2080	}
2081
2082	sync_regs(vcpu, kvm_run);
2083
2084	might_fault();
2085	rc = __vcpu_run(vcpu);
2086
2087	if (signal_pending(current) && !rc) {
2088		kvm_run->exit_reason = KVM_EXIT_INTR;
2089		rc = -EINTR;
2090	}
2091
2092	if (guestdbg_exit_pending(vcpu) && !rc)  {
2093		kvm_s390_prepare_debug_exit(vcpu);
2094		rc = 0;
2095	}
2096
2097	if (rc == -EOPNOTSUPP) {
2098		/* intercept cannot be handled in-kernel, prepare kvm-run */
2099		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
2100		kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2101		kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
2102		kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
2103		rc = 0;
2104	}
2105
2106	if (rc == -EREMOTE) {
2107		/* intercept was handled, but userspace support is needed
2108		 * kvm_run has been prepared by the handler */
2109		rc = 0;
2110	}
2111
2112	store_regs(vcpu, kvm_run);
2113
2114	if (vcpu->sigset_active)
2115		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2116
2117	vcpu->stat.exit_userspace++;
2118	return rc;
2119}
2120
2121/*
2122 * store status at address
2123 * we use have two special cases:
2124 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2125 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2126 */
2127int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2128{
2129	unsigned char archmode = 1;
2130	unsigned int px;
2131	u64 clkcomp;
2132	int rc;
2133
2134	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2135		if (write_guest_abs(vcpu, 163, &archmode, 1))
2136			return -EFAULT;
2137		gpa = SAVE_AREA_BASE;
2138	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2139		if (write_guest_real(vcpu, 163, &archmode, 1))
2140			return -EFAULT;
2141		gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2142	}
2143	rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2144			     vcpu->arch.guest_fpregs.fprs, 128);
2145	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2146			      vcpu->run->s.regs.gprs, 128);
2147	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2148			      &vcpu->arch.sie_block->gpsw, 16);
2149	px = kvm_s390_get_prefix(vcpu);
2150	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2151			      &px, 4);
2152	rc |= write_guest_abs(vcpu,
2153			      gpa + offsetof(struct save_area, fp_ctrl_reg),
2154			      &vcpu->arch.guest_fpregs.fpc, 4);
2155	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2156			      &vcpu->arch.sie_block->todpr, 4);
2157	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2158			      &vcpu->arch.sie_block->cputm, 8);
2159	clkcomp = vcpu->arch.sie_block->ckc >> 8;
2160	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2161			      &clkcomp, 8);
2162	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2163			      &vcpu->run->s.regs.acrs, 64);
2164	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2165			      &vcpu->arch.sie_block->gcr, 128);
2166	return rc ? -EFAULT : 0;
2167}
2168
2169int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2170{
2171	/*
2172	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2173	 * copying in vcpu load/put. Lets update our copies before we save
2174	 * it into the save area
2175	 */
2176	save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
2177	save_fp_regs(vcpu->arch.guest_fpregs.fprs);
2178	save_access_regs(vcpu->run->s.regs.acrs);
2179
2180	return kvm_s390_store_status_unloaded(vcpu, addr);
2181}
2182
2183/*
2184 * store additional status at address
2185 */
2186int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2187					unsigned long gpa)
2188{
2189	/* Only bits 0-53 are used for address formation */
2190	if (!(gpa & ~0x3ff))
2191		return 0;
2192
2193	return write_guest_abs(vcpu, gpa & ~0x3ff,
2194			       (void *)&vcpu->run->s.regs.vrs, 512);
2195}
2196
2197int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2198{
2199	if (!test_kvm_facility(vcpu->kvm, 129))
2200		return 0;
2201
2202	/*
2203	 * The guest VXRS are in the host VXRs due to the lazy
2204	 * copying in vcpu load/put. Let's update our copies before we save
2205	 * it into the save area.
2206	 */
2207	save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
2208
2209	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2210}
2211
2212static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2213{
2214	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2215	kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu);
2216	exit_sie_sync(vcpu);
2217}
2218
2219static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2220{
2221	unsigned int i;
2222	struct kvm_vcpu *vcpu;
2223
2224	kvm_for_each_vcpu(i, vcpu, kvm) {
2225		__disable_ibs_on_vcpu(vcpu);
2226	}
2227}
2228
2229static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2230{
2231	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2232	kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu);
2233	exit_sie_sync(vcpu);
2234}
2235
2236void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2237{
2238	int i, online_vcpus, started_vcpus = 0;
2239
2240	if (!is_vcpu_stopped(vcpu))
2241		return;
2242
2243	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2244	/* Only one cpu at a time may enter/leave the STOPPED state. */
2245	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2246	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2247
2248	for (i = 0; i < online_vcpus; i++) {
2249		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2250			started_vcpus++;
2251	}
2252
2253	if (started_vcpus == 0) {
2254		/* we're the only active VCPU -> speed it up */
2255		__enable_ibs_on_vcpu(vcpu);
2256	} else if (started_vcpus == 1) {
2257		/*
2258		 * As we are starting a second VCPU, we have to disable
2259		 * the IBS facility on all VCPUs to remove potentially
2260		 * oustanding ENABLE requests.
2261		 */
2262		__disable_ibs_on_all_vcpus(vcpu->kvm);
2263	}
2264
2265	atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2266	/*
2267	 * Another VCPU might have used IBS while we were offline.
2268	 * Let's play safe and flush the VCPU at startup.
2269	 */
2270	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2271	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2272	return;
2273}
2274
2275void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2276{
2277	int i, online_vcpus, started_vcpus = 0;
2278	struct kvm_vcpu *started_vcpu = NULL;
2279
2280	if (is_vcpu_stopped(vcpu))
2281		return;
2282
2283	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2284	/* Only one cpu at a time may enter/leave the STOPPED state. */
2285	spin_lock(&vcpu->kvm->arch.start_stop_lock);
2286	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2287
2288	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2289	kvm_s390_clear_stop_irq(vcpu);
2290
2291	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2292	__disable_ibs_on_vcpu(vcpu);
2293
2294	for (i = 0; i < online_vcpus; i++) {
2295		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2296			started_vcpus++;
2297			started_vcpu = vcpu->kvm->vcpus[i];
2298		}
2299	}
2300
2301	if (started_vcpus == 1) {
2302		/*
2303		 * As we only have one VCPU left, we want to enable the
2304		 * IBS facility for that VCPU to speed it up.
2305		 */
2306		__enable_ibs_on_vcpu(started_vcpu);
2307	}
2308
2309	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2310	return;
2311}
2312
2313static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2314				     struct kvm_enable_cap *cap)
2315{
2316	int r;
2317
2318	if (cap->flags)
2319		return -EINVAL;
2320
2321	switch (cap->cap) {
2322	case KVM_CAP_S390_CSS_SUPPORT:
2323		if (!vcpu->kvm->arch.css_support) {
2324			vcpu->kvm->arch.css_support = 1;
2325			trace_kvm_s390_enable_css(vcpu->kvm);
2326		}
2327		r = 0;
2328		break;
2329	default:
2330		r = -EINVAL;
2331		break;
2332	}
2333	return r;
2334}
2335
2336static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2337				  struct kvm_s390_mem_op *mop)
2338{
2339	void __user *uaddr = (void __user *)mop->buf;
2340	void *tmpbuf = NULL;
2341	int r, srcu_idx;
2342	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2343				    | KVM_S390_MEMOP_F_CHECK_ONLY;
2344
2345	if (mop->flags & ~supported_flags)
2346		return -EINVAL;
2347
2348	if (mop->size > MEM_OP_MAX_SIZE)
2349		return -E2BIG;
2350
2351	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2352		tmpbuf = vmalloc(mop->size);
2353		if (!tmpbuf)
2354			return -ENOMEM;
2355	}
2356
2357	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2358
2359	switch (mop->op) {
2360	case KVM_S390_MEMOP_LOGICAL_READ:
2361		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2362			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2363			break;
2364		}
2365		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2366		if (r == 0) {
2367			if (copy_to_user(uaddr, tmpbuf, mop->size))
2368				r = -EFAULT;
2369		}
2370		break;
2371	case KVM_S390_MEMOP_LOGICAL_WRITE:
2372		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2373			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2374			break;
2375		}
2376		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2377			r = -EFAULT;
2378			break;
2379		}
2380		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2381		break;
2382	default:
2383		r = -EINVAL;
2384	}
2385
2386	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2387
2388	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2389		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2390
2391	vfree(tmpbuf);
2392	return r;
2393}
2394
2395long kvm_arch_vcpu_ioctl(struct file *filp,
2396			 unsigned int ioctl, unsigned long arg)
2397{
2398	struct kvm_vcpu *vcpu = filp->private_data;
2399	void __user *argp = (void __user *)arg;
2400	int idx;
2401	long r;
2402
2403	switch (ioctl) {
2404	case KVM_S390_IRQ: {
2405		struct kvm_s390_irq s390irq;
2406
2407		r = -EFAULT;
2408		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2409			break;
2410		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2411		break;
2412	}
2413	case KVM_S390_INTERRUPT: {
2414		struct kvm_s390_interrupt s390int;
2415		struct kvm_s390_irq s390irq;
2416
2417		r = -EFAULT;
2418		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2419			break;
2420		if (s390int_to_s390irq(&s390int, &s390irq))
2421			return -EINVAL;
2422		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2423		break;
2424	}
2425	case KVM_S390_STORE_STATUS:
2426		idx = srcu_read_lock(&vcpu->kvm->srcu);
2427		r = kvm_s390_vcpu_store_status(vcpu, arg);
2428		srcu_read_unlock(&vcpu->kvm->srcu, idx);
2429		break;
2430	case KVM_S390_SET_INITIAL_PSW: {
2431		psw_t psw;
2432
2433		r = -EFAULT;
2434		if (copy_from_user(&psw, argp, sizeof(psw)))
2435			break;
2436		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2437		break;
2438	}
2439	case KVM_S390_INITIAL_RESET:
2440		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2441		break;
2442	case KVM_SET_ONE_REG:
2443	case KVM_GET_ONE_REG: {
2444		struct kvm_one_reg reg;
2445		r = -EFAULT;
2446		if (copy_from_user(&reg, argp, sizeof(reg)))
2447			break;
2448		if (ioctl == KVM_SET_ONE_REG)
2449			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2450		else
2451			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2452		break;
2453	}
2454#ifdef CONFIG_KVM_S390_UCONTROL
2455	case KVM_S390_UCAS_MAP: {
2456		struct kvm_s390_ucas_mapping ucasmap;
2457
2458		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2459			r = -EFAULT;
2460			break;
2461		}
2462
2463		if (!kvm_is_ucontrol(vcpu->kvm)) {
2464			r = -EINVAL;
2465			break;
2466		}
2467
2468		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2469				     ucasmap.vcpu_addr, ucasmap.length);
2470		break;
2471	}
2472	case KVM_S390_UCAS_UNMAP: {
2473		struct kvm_s390_ucas_mapping ucasmap;
2474
2475		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2476			r = -EFAULT;
2477			break;
2478		}
2479
2480		if (!kvm_is_ucontrol(vcpu->kvm)) {
2481			r = -EINVAL;
2482			break;
2483		}
2484
2485		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2486			ucasmap.length);
2487		break;
2488	}
2489#endif
2490	case KVM_S390_VCPU_FAULT: {
2491		r = gmap_fault(vcpu->arch.gmap, arg, 0);
2492		break;
2493	}
2494	case KVM_ENABLE_CAP:
2495	{
2496		struct kvm_enable_cap cap;
2497		r = -EFAULT;
2498		if (copy_from_user(&cap, argp, sizeof(cap)))
2499			break;
2500		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2501		break;
2502	}
2503	case KVM_S390_MEM_OP: {
2504		struct kvm_s390_mem_op mem_op;
2505
2506		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2507			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2508		else
2509			r = -EFAULT;
2510		break;
2511	}
2512	case KVM_S390_SET_IRQ_STATE: {
2513		struct kvm_s390_irq_state irq_state;
2514
2515		r = -EFAULT;
2516		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2517			break;
2518		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2519		    irq_state.len == 0 ||
2520		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2521			r = -EINVAL;
2522			break;
2523		}
2524		r = kvm_s390_set_irq_state(vcpu,
2525					   (void __user *) irq_state.buf,
2526					   irq_state.len);
2527		break;
2528	}
2529	case KVM_S390_GET_IRQ_STATE: {
2530		struct kvm_s390_irq_state irq_state;
2531
2532		r = -EFAULT;
2533		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2534			break;
2535		if (irq_state.len == 0) {
2536			r = -EINVAL;
2537			break;
2538		}
2539		r = kvm_s390_get_irq_state(vcpu,
2540					   (__u8 __user *)  irq_state.buf,
2541					   irq_state.len);
2542		break;
2543	}
2544	default:
2545		r = -ENOTTY;
2546	}
2547	return r;
2548}
2549
2550int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2551{
2552#ifdef CONFIG_KVM_S390_UCONTROL
2553	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2554		 && (kvm_is_ucontrol(vcpu->kvm))) {
2555		vmf->page = virt_to_page(vcpu->arch.sie_block);
2556		get_page(vmf->page);
2557		return 0;
2558	}
2559#endif
2560	return VM_FAULT_SIGBUS;
2561}
2562
2563int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2564			    unsigned long npages)
2565{
2566	return 0;
2567}
2568
2569/* Section: memory related */
2570int kvm_arch_prepare_memory_region(struct kvm *kvm,
2571				   struct kvm_memory_slot *memslot,
2572				   struct kvm_userspace_memory_region *mem,
2573				   enum kvm_mr_change change)
2574{
2575	/* A few sanity checks. We can have memory slots which have to be
2576	   located/ended at a segment boundary (1MB). The memory in userland is
2577	   ok to be fragmented into various different vmas. It is okay to mmap()
2578	   and munmap() stuff in this slot after doing this call at any time */
2579
2580	if (mem->userspace_addr & 0xffffful)
2581		return -EINVAL;
2582
2583	if (mem->memory_size & 0xffffful)
2584		return -EINVAL;
2585
2586	return 0;
2587}
2588
2589void kvm_arch_commit_memory_region(struct kvm *kvm,
2590				struct kvm_userspace_memory_region *mem,
2591				const struct kvm_memory_slot *old,
2592				enum kvm_mr_change change)
2593{
2594	int rc;
2595
2596	/* If the basics of the memslot do not change, we do not want
2597	 * to update the gmap. Every update causes several unnecessary
2598	 * segment translation exceptions. This is usually handled just
2599	 * fine by the normal fault handler + gmap, but it will also
2600	 * cause faults on the prefix page of running guest CPUs.
2601	 */
2602	if (old->userspace_addr == mem->userspace_addr &&
2603	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2604	    old->npages * PAGE_SIZE == mem->memory_size)
2605		return;
2606
2607	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2608		mem->guest_phys_addr, mem->memory_size);
2609	if (rc)
2610		printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
2611	return;
2612}
2613
2614static int __init kvm_s390_init(void)
2615{
2616	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2617}
2618
2619static void __exit kvm_s390_exit(void)
2620{
2621	kvm_exit();
2622}
2623
2624module_init(kvm_s390_init);
2625module_exit(kvm_s390_exit);
2626
2627/*
2628 * Enable autoloading of the kvm module.
2629 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2630 * since x86 takes a different approach.
2631 */
2632#include <linux/miscdevice.h>
2633MODULE_ALIAS_MISCDEV(KVM_MINOR);
2634MODULE_ALIAS("devname:kvm");
2635