1/*
2 *  S390 version
3 *    Copyright IBM Corp. 1999, 2000
4 *    Author(s): Hartmut Penner (hp@de.ibm.com)
5 *               Ulrich Weigand (weigand@de.ibm.com)
6 *               Martin Schwidefsky (schwidefsky@de.ibm.com)
7 *
8 *  Derived from "include/asm-i386/pgtable.h"
9 */
10
11#ifndef _ASM_S390_PGTABLE_H
12#define _ASM_S390_PGTABLE_H
13
14/*
15 * The Linux memory management assumes a three-level page table setup.
16 * For s390 64 bit we use up to four of the five levels the hardware
17 * provides (region first tables are not used).
18 *
19 * The "pgd_xxx()" functions are trivial for a folded two-level
20 * setup: the pgd is never bad, and a pmd always exists (as it's folded
21 * into the pgd entry)
22 *
23 * This file contains the functions and defines necessary to modify and use
24 * the S390 page table tree.
25 */
26#ifndef __ASSEMBLY__
27#include <linux/sched.h>
28#include <linux/mm_types.h>
29#include <linux/page-flags.h>
30#include <linux/radix-tree.h>
31#include <asm/bug.h>
32#include <asm/page.h>
33
34extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096)));
35extern void paging_init(void);
36extern void vmem_map_init(void);
37
38/*
39 * The S390 doesn't have any external MMU info: the kernel page
40 * tables contain all the necessary information.
41 */
42#define update_mmu_cache(vma, address, ptep)     do { } while (0)
43#define update_mmu_cache_pmd(vma, address, ptep) do { } while (0)
44
45/*
46 * ZERO_PAGE is a global shared page that is always zero; used
47 * for zero-mapped memory areas etc..
48 */
49
50extern unsigned long empty_zero_page;
51extern unsigned long zero_page_mask;
52
53#define ZERO_PAGE(vaddr) \
54	(virt_to_page((void *)(empty_zero_page + \
55	 (((unsigned long)(vaddr)) &zero_page_mask))))
56#define __HAVE_COLOR_ZERO_PAGE
57
58/* TODO: s390 cannot support io_remap_pfn_range... */
59#endif /* !__ASSEMBLY__ */
60
61/*
62 * PMD_SHIFT determines the size of the area a second-level page
63 * table can map
64 * PGDIR_SHIFT determines what a third-level page table entry can map
65 */
66#define PMD_SHIFT	20
67#define PUD_SHIFT	31
68#define PGDIR_SHIFT	42
69
70#define PMD_SIZE        (1UL << PMD_SHIFT)
71#define PMD_MASK        (~(PMD_SIZE-1))
72#define PUD_SIZE	(1UL << PUD_SHIFT)
73#define PUD_MASK	(~(PUD_SIZE-1))
74#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
75#define PGDIR_MASK	(~(PGDIR_SIZE-1))
76
77/*
78 * entries per page directory level: the S390 is two-level, so
79 * we don't really have any PMD directory physically.
80 * for S390 segment-table entries are combined to one PGD
81 * that leads to 1024 pte per pgd
82 */
83#define PTRS_PER_PTE	256
84#define PTRS_PER_PMD	2048
85#define PTRS_PER_PUD	2048
86#define PTRS_PER_PGD	2048
87
88#define FIRST_USER_ADDRESS  0UL
89
90#define pte_ERROR(e) \
91	printk("%s:%d: bad pte %p.\n", __FILE__, __LINE__, (void *) pte_val(e))
92#define pmd_ERROR(e) \
93	printk("%s:%d: bad pmd %p.\n", __FILE__, __LINE__, (void *) pmd_val(e))
94#define pud_ERROR(e) \
95	printk("%s:%d: bad pud %p.\n", __FILE__, __LINE__, (void *) pud_val(e))
96#define pgd_ERROR(e) \
97	printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e))
98
99#ifndef __ASSEMBLY__
100/*
101 * The vmalloc and module area will always be on the topmost area of the
102 * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules.
103 * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where
104 * modules will reside. That makes sure that inter module branches always
105 * happen without trampolines and in addition the placement within a 2GB frame
106 * is branch prediction unit friendly.
107 */
108extern unsigned long VMALLOC_START;
109extern unsigned long VMALLOC_END;
110extern struct page *vmemmap;
111
112#define VMEM_MAX_PHYS ((unsigned long) vmemmap)
113
114extern unsigned long MODULES_VADDR;
115extern unsigned long MODULES_END;
116#define MODULES_VADDR	MODULES_VADDR
117#define MODULES_END	MODULES_END
118#define MODULES_LEN	(1UL << 31)
119
120static inline int is_module_addr(void *addr)
121{
122	BUILD_BUG_ON(MODULES_LEN > (1UL << 31));
123	if (addr < (void *)MODULES_VADDR)
124		return 0;
125	if (addr > (void *)MODULES_END)
126		return 0;
127	return 1;
128}
129
130/*
131 * A 64 bit pagetable entry of S390 has following format:
132 * |			 PFRA			      |0IPC|  OS  |
133 * 0000000000111111111122222222223333333333444444444455555555556666
134 * 0123456789012345678901234567890123456789012345678901234567890123
135 *
136 * I Page-Invalid Bit:    Page is not available for address-translation
137 * P Page-Protection Bit: Store access not possible for page
138 * C Change-bit override: HW is not required to set change bit
139 *
140 * A 64 bit segmenttable entry of S390 has following format:
141 * |        P-table origin                              |      TT
142 * 0000000000111111111122222222223333333333444444444455555555556666
143 * 0123456789012345678901234567890123456789012345678901234567890123
144 *
145 * I Segment-Invalid Bit:    Segment is not available for address-translation
146 * C Common-Segment Bit:     Segment is not private (PoP 3-30)
147 * P Page-Protection Bit: Store access not possible for page
148 * TT Type 00
149 *
150 * A 64 bit region table entry of S390 has following format:
151 * |        S-table origin                             |   TF  TTTL
152 * 0000000000111111111122222222223333333333444444444455555555556666
153 * 0123456789012345678901234567890123456789012345678901234567890123
154 *
155 * I Segment-Invalid Bit:    Segment is not available for address-translation
156 * TT Type 01
157 * TF
158 * TL Table length
159 *
160 * The 64 bit regiontable origin of S390 has following format:
161 * |      region table origon                          |       DTTL
162 * 0000000000111111111122222222223333333333444444444455555555556666
163 * 0123456789012345678901234567890123456789012345678901234567890123
164 *
165 * X Space-Switch event:
166 * G Segment-Invalid Bit:
167 * P Private-Space Bit:
168 * S Storage-Alteration:
169 * R Real space
170 * TL Table-Length:
171 *
172 * A storage key has the following format:
173 * | ACC |F|R|C|0|
174 *  0   3 4 5 6 7
175 * ACC: access key
176 * F  : fetch protection bit
177 * R  : referenced bit
178 * C  : changed bit
179 */
180
181/* Hardware bits in the page table entry */
182#define _PAGE_PROTECT	0x200		/* HW read-only bit  */
183#define _PAGE_INVALID	0x400		/* HW invalid bit    */
184#define _PAGE_LARGE	0x800		/* Bit to mark a large pte */
185
186/* Software bits in the page table entry */
187#define _PAGE_PRESENT	0x001		/* SW pte present bit */
188#define _PAGE_YOUNG	0x004		/* SW pte young bit */
189#define _PAGE_DIRTY	0x008		/* SW pte dirty bit */
190#define _PAGE_READ	0x010		/* SW pte read bit */
191#define _PAGE_WRITE	0x020		/* SW pte write bit */
192#define _PAGE_SPECIAL	0x040		/* SW associated with special page */
193#define _PAGE_UNUSED	0x080		/* SW bit for pgste usage state */
194#define __HAVE_ARCH_PTE_SPECIAL
195
196/* Set of bits not changed in pte_modify */
197#define _PAGE_CHG_MASK		(PAGE_MASK | _PAGE_SPECIAL | _PAGE_DIRTY | \
198				 _PAGE_YOUNG)
199
200/*
201 * handle_pte_fault uses pte_present and pte_none to find out the pte type
202 * WITHOUT holding the page table lock. The _PAGE_PRESENT bit is used to
203 * distinguish present from not-present ptes. It is changed only with the page
204 * table lock held.
205 *
206 * The following table gives the different possible bit combinations for
207 * the pte hardware and software bits in the last 12 bits of a pte
208 * (. unassigned bit, x don't care, t swap type):
209 *
210 *				842100000000
211 *				000084210000
212 *				000000008421
213 *				.IR.uswrdy.p
214 * empty			.10.00000000
215 * swap				.11..ttttt.0
216 * prot-none, clean, old	.11.xx0000.1
217 * prot-none, clean, young	.11.xx0001.1
218 * prot-none, dirty, old	.10.xx0010.1
219 * prot-none, dirty, young	.10.xx0011.1
220 * read-only, clean, old	.11.xx0100.1
221 * read-only, clean, young	.01.xx0101.1
222 * read-only, dirty, old	.11.xx0110.1
223 * read-only, dirty, young	.01.xx0111.1
224 * read-write, clean, old	.11.xx1100.1
225 * read-write, clean, young	.01.xx1101.1
226 * read-write, dirty, old	.10.xx1110.1
227 * read-write, dirty, young	.00.xx1111.1
228 * HW-bits: R read-only, I invalid
229 * SW-bits: p present, y young, d dirty, r read, w write, s special,
230 *	    u unused, l large
231 *
232 * pte_none    is true for the bit pattern .10.00000000, pte == 0x400
233 * pte_swap    is true for the bit pattern .11..ooooo.0, (pte & 0x201) == 0x200
234 * pte_present is true for the bit pattern .xx.xxxxxx.1, (pte & 0x001) == 0x001
235 */
236
237/* Bits in the segment/region table address-space-control-element */
238#define _ASCE_ORIGIN		~0xfffUL/* segment table origin		    */
239#define _ASCE_PRIVATE_SPACE	0x100	/* private space control	    */
240#define _ASCE_ALT_EVENT		0x80	/* storage alteration event control */
241#define _ASCE_SPACE_SWITCH	0x40	/* space switch event		    */
242#define _ASCE_REAL_SPACE	0x20	/* real space control		    */
243#define _ASCE_TYPE_MASK		0x0c	/* asce table type mask		    */
244#define _ASCE_TYPE_REGION1	0x0c	/* region first table type	    */
245#define _ASCE_TYPE_REGION2	0x08	/* region second table type	    */
246#define _ASCE_TYPE_REGION3	0x04	/* region third table type	    */
247#define _ASCE_TYPE_SEGMENT	0x00	/* segment table type		    */
248#define _ASCE_TABLE_LENGTH	0x03	/* region table length		    */
249
250/* Bits in the region table entry */
251#define _REGION_ENTRY_ORIGIN	~0xfffUL/* region/segment table origin	    */
252#define _REGION_ENTRY_PROTECT	0x200	/* region protection bit	    */
253#define _REGION_ENTRY_INVALID	0x20	/* invalid region table entry	    */
254#define _REGION_ENTRY_TYPE_MASK	0x0c	/* region/segment table type mask   */
255#define _REGION_ENTRY_TYPE_R1	0x0c	/* region first table type	    */
256#define _REGION_ENTRY_TYPE_R2	0x08	/* region second table type	    */
257#define _REGION_ENTRY_TYPE_R3	0x04	/* region third table type	    */
258#define _REGION_ENTRY_LENGTH	0x03	/* region third length		    */
259
260#define _REGION1_ENTRY		(_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH)
261#define _REGION1_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID)
262#define _REGION2_ENTRY		(_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH)
263#define _REGION2_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID)
264#define _REGION3_ENTRY		(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
265#define _REGION3_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
266
267#define _REGION3_ENTRY_LARGE	0x400	/* RTTE-format control, large page  */
268#define _REGION3_ENTRY_RO	0x200	/* page protection bit		    */
269
270/* Bits in the segment table entry */
271#define _SEGMENT_ENTRY_BITS	0xfffffffffffffe33UL
272#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL
273#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address	    */
274#define _SEGMENT_ENTRY_ORIGIN	~0x7ffUL/* segment table origin		    */
275#define _SEGMENT_ENTRY_PROTECT	0x200	/* page protection bit		    */
276#define _SEGMENT_ENTRY_INVALID	0x20	/* invalid segment table entry	    */
277
278#define _SEGMENT_ENTRY		(0)
279#define _SEGMENT_ENTRY_EMPTY	(_SEGMENT_ENTRY_INVALID)
280
281#define _SEGMENT_ENTRY_DIRTY	0x2000	/* SW segment dirty bit */
282#define _SEGMENT_ENTRY_YOUNG	0x1000	/* SW segment young bit */
283#define _SEGMENT_ENTRY_SPLIT	0x0800	/* THP splitting bit */
284#define _SEGMENT_ENTRY_LARGE	0x0400	/* STE-format control, large page */
285#define _SEGMENT_ENTRY_READ	0x0002	/* SW segment read bit */
286#define _SEGMENT_ENTRY_WRITE	0x0001	/* SW segment write bit */
287
288/*
289 * Segment table entry encoding (R = read-only, I = invalid, y = young bit):
290 *				dy..R...I...wr
291 * prot-none, clean, old	00..1...1...00
292 * prot-none, clean, young	01..1...1...00
293 * prot-none, dirty, old	10..1...1...00
294 * prot-none, dirty, young	11..1...1...00
295 * read-only, clean, old	00..1...1...01
296 * read-only, clean, young	01..1...0...01
297 * read-only, dirty, old	10..1...1...01
298 * read-only, dirty, young	11..1...0...01
299 * read-write, clean, old	00..1...1...11
300 * read-write, clean, young	01..1...0...11
301 * read-write, dirty, old	10..0...1...11
302 * read-write, dirty, young	11..0...0...11
303 * The segment table origin is used to distinguish empty (origin==0) from
304 * read-write, old segment table entries (origin!=0)
305 * HW-bits: R read-only, I invalid
306 * SW-bits: y young, d dirty, r read, w write
307 */
308
309#define _SEGMENT_ENTRY_SPLIT_BIT 11	/* THP splitting bit number */
310
311/* Page status table bits for virtualization */
312#define PGSTE_ACC_BITS	0xf000000000000000UL
313#define PGSTE_FP_BIT	0x0800000000000000UL
314#define PGSTE_PCL_BIT	0x0080000000000000UL
315#define PGSTE_HR_BIT	0x0040000000000000UL
316#define PGSTE_HC_BIT	0x0020000000000000UL
317#define PGSTE_GR_BIT	0x0004000000000000UL
318#define PGSTE_GC_BIT	0x0002000000000000UL
319#define PGSTE_UC_BIT	0x0000800000000000UL	/* user dirty (migration) */
320#define PGSTE_IN_BIT	0x0000400000000000UL	/* IPTE notify bit */
321
322/* Guest Page State used for virtualization */
323#define _PGSTE_GPS_ZERO		0x0000000080000000UL
324#define _PGSTE_GPS_USAGE_MASK	0x0000000003000000UL
325#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL
326#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL
327
328/*
329 * A user page table pointer has the space-switch-event bit, the
330 * private-space-control bit and the storage-alteration-event-control
331 * bit set. A kernel page table pointer doesn't need them.
332 */
333#define _ASCE_USER_BITS		(_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \
334				 _ASCE_ALT_EVENT)
335
336/*
337 * Page protection definitions.
338 */
339#define PAGE_NONE	__pgprot(_PAGE_PRESENT | _PAGE_INVALID)
340#define PAGE_READ	__pgprot(_PAGE_PRESENT | _PAGE_READ | \
341				 _PAGE_INVALID | _PAGE_PROTECT)
342#define PAGE_WRITE	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
343				 _PAGE_INVALID | _PAGE_PROTECT)
344
345#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
346				 _PAGE_YOUNG | _PAGE_DIRTY)
347#define PAGE_KERNEL	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
348				 _PAGE_YOUNG | _PAGE_DIRTY)
349#define PAGE_KERNEL_RO	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
350				 _PAGE_PROTECT)
351
352/*
353 * On s390 the page table entry has an invalid bit and a read-only bit.
354 * Read permission implies execute permission and write permission
355 * implies read permission.
356 */
357         /*xwr*/
358#define __P000	PAGE_NONE
359#define __P001	PAGE_READ
360#define __P010	PAGE_READ
361#define __P011	PAGE_READ
362#define __P100	PAGE_READ
363#define __P101	PAGE_READ
364#define __P110	PAGE_READ
365#define __P111	PAGE_READ
366
367#define __S000	PAGE_NONE
368#define __S001	PAGE_READ
369#define __S010	PAGE_WRITE
370#define __S011	PAGE_WRITE
371#define __S100	PAGE_READ
372#define __S101	PAGE_READ
373#define __S110	PAGE_WRITE
374#define __S111	PAGE_WRITE
375
376/*
377 * Segment entry (large page) protection definitions.
378 */
379#define SEGMENT_NONE	__pgprot(_SEGMENT_ENTRY_INVALID | \
380				 _SEGMENT_ENTRY_PROTECT)
381#define SEGMENT_READ	__pgprot(_SEGMENT_ENTRY_PROTECT | \
382				 _SEGMENT_ENTRY_READ)
383#define SEGMENT_WRITE	__pgprot(_SEGMENT_ENTRY_READ | \
384				 _SEGMENT_ENTRY_WRITE)
385
386static inline int mm_has_pgste(struct mm_struct *mm)
387{
388#ifdef CONFIG_PGSTE
389	if (unlikely(mm->context.has_pgste))
390		return 1;
391#endif
392	return 0;
393}
394
395static inline int mm_alloc_pgste(struct mm_struct *mm)
396{
397#ifdef CONFIG_PGSTE
398	if (unlikely(mm->context.alloc_pgste))
399		return 1;
400#endif
401	return 0;
402}
403
404/*
405 * In the case that a guest uses storage keys
406 * faults should no longer be backed by zero pages
407 */
408#define mm_forbids_zeropage mm_use_skey
409static inline int mm_use_skey(struct mm_struct *mm)
410{
411#ifdef CONFIG_PGSTE
412	if (mm->context.use_skey)
413		return 1;
414#endif
415	return 0;
416}
417
418/*
419 * pgd/pmd/pte query functions
420 */
421static inline int pgd_present(pgd_t pgd)
422{
423	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
424		return 1;
425	return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL;
426}
427
428static inline int pgd_none(pgd_t pgd)
429{
430	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
431		return 0;
432	return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL;
433}
434
435static inline int pgd_bad(pgd_t pgd)
436{
437	/*
438	 * With dynamic page table levels the pgd can be a region table
439	 * entry or a segment table entry. Check for the bit that are
440	 * invalid for either table entry.
441	 */
442	unsigned long mask =
443		~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
444		~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
445	return (pgd_val(pgd) & mask) != 0;
446}
447
448static inline int pud_present(pud_t pud)
449{
450	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
451		return 1;
452	return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL;
453}
454
455static inline int pud_none(pud_t pud)
456{
457	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
458		return 0;
459	return (pud_val(pud) & _REGION_ENTRY_INVALID) != 0UL;
460}
461
462static inline int pud_large(pud_t pud)
463{
464	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3)
465		return 0;
466	return !!(pud_val(pud) & _REGION3_ENTRY_LARGE);
467}
468
469static inline int pud_bad(pud_t pud)
470{
471	/*
472	 * With dynamic page table levels the pud can be a region table
473	 * entry or a segment table entry. Check for the bit that are
474	 * invalid for either table entry.
475	 */
476	unsigned long mask =
477		~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
478		~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
479	return (pud_val(pud) & mask) != 0;
480}
481
482static inline int pmd_present(pmd_t pmd)
483{
484	return pmd_val(pmd) != _SEGMENT_ENTRY_INVALID;
485}
486
487static inline int pmd_none(pmd_t pmd)
488{
489	return pmd_val(pmd) == _SEGMENT_ENTRY_INVALID;
490}
491
492static inline int pmd_large(pmd_t pmd)
493{
494	return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
495}
496
497static inline unsigned long pmd_pfn(pmd_t pmd)
498{
499	unsigned long origin_mask;
500
501	origin_mask = _SEGMENT_ENTRY_ORIGIN;
502	if (pmd_large(pmd))
503		origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE;
504	return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT;
505}
506
507static inline int pmd_bad(pmd_t pmd)
508{
509	if (pmd_large(pmd))
510		return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
511	return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
512}
513
514#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
515extern void pmdp_splitting_flush(struct vm_area_struct *vma,
516				 unsigned long addr, pmd_t *pmdp);
517
518#define  __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
519extern int pmdp_set_access_flags(struct vm_area_struct *vma,
520				 unsigned long address, pmd_t *pmdp,
521				 pmd_t entry, int dirty);
522
523#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
524extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
525				  unsigned long address, pmd_t *pmdp);
526
527#define __HAVE_ARCH_PMD_WRITE
528static inline int pmd_write(pmd_t pmd)
529{
530	return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0;
531}
532
533static inline int pmd_dirty(pmd_t pmd)
534{
535	int dirty = 1;
536	if (pmd_large(pmd))
537		dirty = (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
538	return dirty;
539}
540
541static inline int pmd_young(pmd_t pmd)
542{
543	int young = 1;
544	if (pmd_large(pmd))
545		young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
546	return young;
547}
548
549static inline int pte_present(pte_t pte)
550{
551	/* Bit pattern: (pte & 0x001) == 0x001 */
552	return (pte_val(pte) & _PAGE_PRESENT) != 0;
553}
554
555static inline int pte_none(pte_t pte)
556{
557	/* Bit pattern: pte == 0x400 */
558	return pte_val(pte) == _PAGE_INVALID;
559}
560
561static inline int pte_swap(pte_t pte)
562{
563	/* Bit pattern: (pte & 0x201) == 0x200 */
564	return (pte_val(pte) & (_PAGE_PROTECT | _PAGE_PRESENT))
565		== _PAGE_PROTECT;
566}
567
568static inline int pte_special(pte_t pte)
569{
570	return (pte_val(pte) & _PAGE_SPECIAL);
571}
572
573#define __HAVE_ARCH_PTE_SAME
574static inline int pte_same(pte_t a, pte_t b)
575{
576	return pte_val(a) == pte_val(b);
577}
578
579static inline pgste_t pgste_get_lock(pte_t *ptep)
580{
581	unsigned long new = 0;
582#ifdef CONFIG_PGSTE
583	unsigned long old;
584
585	preempt_disable();
586	asm(
587		"	lg	%0,%2\n"
588		"0:	lgr	%1,%0\n"
589		"	nihh	%0,0xff7f\n"	/* clear PCL bit in old */
590		"	oihh	%1,0x0080\n"	/* set PCL bit in new */
591		"	csg	%0,%1,%2\n"
592		"	jl	0b\n"
593		: "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
594		: "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
595#endif
596	return __pgste(new);
597}
598
599static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
600{
601#ifdef CONFIG_PGSTE
602	asm(
603		"	nihh	%1,0xff7f\n"	/* clear PCL bit */
604		"	stg	%1,%0\n"
605		: "=Q" (ptep[PTRS_PER_PTE])
606		: "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
607		: "cc", "memory");
608	preempt_enable();
609#endif
610}
611
612static inline pgste_t pgste_get(pte_t *ptep)
613{
614	unsigned long pgste = 0;
615#ifdef CONFIG_PGSTE
616	pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
617#endif
618	return __pgste(pgste);
619}
620
621static inline void pgste_set(pte_t *ptep, pgste_t pgste)
622{
623#ifdef CONFIG_PGSTE
624	*(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
625#endif
626}
627
628static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste,
629				       struct mm_struct *mm)
630{
631#ifdef CONFIG_PGSTE
632	unsigned long address, bits, skey;
633
634	if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID)
635		return pgste;
636	address = pte_val(*ptep) & PAGE_MASK;
637	skey = (unsigned long) page_get_storage_key(address);
638	bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
639	/* Transfer page changed & referenced bit to guest bits in pgste */
640	pgste_val(pgste) |= bits << 48;		/* GR bit & GC bit */
641	/* Copy page access key and fetch protection bit to pgste */
642	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
643	pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
644#endif
645	return pgste;
646
647}
648
649static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
650				 struct mm_struct *mm)
651{
652#ifdef CONFIG_PGSTE
653	unsigned long address;
654	unsigned long nkey;
655
656	if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
657		return;
658	VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
659	address = pte_val(entry) & PAGE_MASK;
660	/*
661	 * Set page access key and fetch protection bit from pgste.
662	 * The guest C/R information is still in the PGSTE, set real
663	 * key C/R to 0.
664	 */
665	nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
666	nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
667	page_set_storage_key(address, nkey, 0);
668#endif
669}
670
671static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
672{
673	if ((pte_val(entry) & _PAGE_PRESENT) &&
674	    (pte_val(entry) & _PAGE_WRITE) &&
675	    !(pte_val(entry) & _PAGE_INVALID)) {
676		if (!MACHINE_HAS_ESOP) {
677			/*
678			 * Without enhanced suppression-on-protection force
679			 * the dirty bit on for all writable ptes.
680			 */
681			pte_val(entry) |= _PAGE_DIRTY;
682			pte_val(entry) &= ~_PAGE_PROTECT;
683		}
684		if (!(pte_val(entry) & _PAGE_PROTECT))
685			/* This pte allows write access, set user-dirty */
686			pgste_val(pgste) |= PGSTE_UC_BIT;
687	}
688	*ptep = entry;
689	return pgste;
690}
691
692/**
693 * struct gmap_struct - guest address space
694 * @crst_list: list of all crst tables used in the guest address space
695 * @mm: pointer to the parent mm_struct
696 * @guest_to_host: radix tree with guest to host address translation
697 * @host_to_guest: radix tree with pointer to segment table entries
698 * @guest_table_lock: spinlock to protect all entries in the guest page table
699 * @table: pointer to the page directory
700 * @asce: address space control element for gmap page table
701 * @pfault_enabled: defines if pfaults are applicable for the guest
702 */
703struct gmap {
704	struct list_head list;
705	struct list_head crst_list;
706	struct mm_struct *mm;
707	struct radix_tree_root guest_to_host;
708	struct radix_tree_root host_to_guest;
709	spinlock_t guest_table_lock;
710	unsigned long *table;
711	unsigned long asce;
712	unsigned long asce_end;
713	void *private;
714	bool pfault_enabled;
715};
716
717/**
718 * struct gmap_notifier - notify function block for page invalidation
719 * @notifier_call: address of callback function
720 */
721struct gmap_notifier {
722	struct list_head list;
723	void (*notifier_call)(struct gmap *gmap, unsigned long gaddr);
724};
725
726struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit);
727void gmap_free(struct gmap *gmap);
728void gmap_enable(struct gmap *gmap);
729void gmap_disable(struct gmap *gmap);
730int gmap_map_segment(struct gmap *gmap, unsigned long from,
731		     unsigned long to, unsigned long len);
732int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
733unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
734unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
735int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
736int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
737void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
738void __gmap_zap(struct gmap *, unsigned long gaddr);
739bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
740
741
742void gmap_register_ipte_notifier(struct gmap_notifier *);
743void gmap_unregister_ipte_notifier(struct gmap_notifier *);
744int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
745void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *);
746
747static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
748					unsigned long addr,
749					pte_t *ptep, pgste_t pgste)
750{
751#ifdef CONFIG_PGSTE
752	if (pgste_val(pgste) & PGSTE_IN_BIT) {
753		pgste_val(pgste) &= ~PGSTE_IN_BIT;
754		gmap_do_ipte_notify(mm, addr, ptep);
755	}
756#endif
757	return pgste;
758}
759
760/*
761 * Certain architectures need to do special things when PTEs
762 * within a page table are directly modified.  Thus, the following
763 * hook is made available.
764 */
765static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
766			      pte_t *ptep, pte_t entry)
767{
768	pgste_t pgste;
769
770	if (mm_has_pgste(mm)) {
771		pgste = pgste_get_lock(ptep);
772		pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
773		pgste_set_key(ptep, pgste, entry, mm);
774		pgste = pgste_set_pte(ptep, pgste, entry);
775		pgste_set_unlock(ptep, pgste);
776	} else {
777		*ptep = entry;
778	}
779}
780
781/*
782 * query functions pte_write/pte_dirty/pte_young only work if
783 * pte_present() is true. Undefined behaviour if not..
784 */
785static inline int pte_write(pte_t pte)
786{
787	return (pte_val(pte) & _PAGE_WRITE) != 0;
788}
789
790static inline int pte_dirty(pte_t pte)
791{
792	return (pte_val(pte) & _PAGE_DIRTY) != 0;
793}
794
795static inline int pte_young(pte_t pte)
796{
797	return (pte_val(pte) & _PAGE_YOUNG) != 0;
798}
799
800#define __HAVE_ARCH_PTE_UNUSED
801static inline int pte_unused(pte_t pte)
802{
803	return pte_val(pte) & _PAGE_UNUSED;
804}
805
806/*
807 * pgd/pmd/pte modification functions
808 */
809
810static inline void pgd_clear(pgd_t *pgd)
811{
812	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
813		pgd_val(*pgd) = _REGION2_ENTRY_EMPTY;
814}
815
816static inline void pud_clear(pud_t *pud)
817{
818	if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
819		pud_val(*pud) = _REGION3_ENTRY_EMPTY;
820}
821
822static inline void pmd_clear(pmd_t *pmdp)
823{
824	pmd_val(*pmdp) = _SEGMENT_ENTRY_INVALID;
825}
826
827static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
828{
829	pte_val(*ptep) = _PAGE_INVALID;
830}
831
832/*
833 * The following pte modification functions only work if
834 * pte_present() is true. Undefined behaviour if not..
835 */
836static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
837{
838	pte_val(pte) &= _PAGE_CHG_MASK;
839	pte_val(pte) |= pgprot_val(newprot);
840	/*
841	 * newprot for PAGE_NONE, PAGE_READ and PAGE_WRITE has the
842	 * invalid bit set, clear it again for readable, young pages
843	 */
844	if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ))
845		pte_val(pte) &= ~_PAGE_INVALID;
846	/*
847	 * newprot for PAGE_READ and PAGE_WRITE has the page protection
848	 * bit set, clear it again for writable, dirty pages
849	 */
850	if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE))
851		pte_val(pte) &= ~_PAGE_PROTECT;
852	return pte;
853}
854
855static inline pte_t pte_wrprotect(pte_t pte)
856{
857	pte_val(pte) &= ~_PAGE_WRITE;
858	pte_val(pte) |= _PAGE_PROTECT;
859	return pte;
860}
861
862static inline pte_t pte_mkwrite(pte_t pte)
863{
864	pte_val(pte) |= _PAGE_WRITE;
865	if (pte_val(pte) & _PAGE_DIRTY)
866		pte_val(pte) &= ~_PAGE_PROTECT;
867	return pte;
868}
869
870static inline pte_t pte_mkclean(pte_t pte)
871{
872	pte_val(pte) &= ~_PAGE_DIRTY;
873	pte_val(pte) |= _PAGE_PROTECT;
874	return pte;
875}
876
877static inline pte_t pte_mkdirty(pte_t pte)
878{
879	pte_val(pte) |= _PAGE_DIRTY;
880	if (pte_val(pte) & _PAGE_WRITE)
881		pte_val(pte) &= ~_PAGE_PROTECT;
882	return pte;
883}
884
885static inline pte_t pte_mkold(pte_t pte)
886{
887	pte_val(pte) &= ~_PAGE_YOUNG;
888	pte_val(pte) |= _PAGE_INVALID;
889	return pte;
890}
891
892static inline pte_t pte_mkyoung(pte_t pte)
893{
894	pte_val(pte) |= _PAGE_YOUNG;
895	if (pte_val(pte) & _PAGE_READ)
896		pte_val(pte) &= ~_PAGE_INVALID;
897	return pte;
898}
899
900static inline pte_t pte_mkspecial(pte_t pte)
901{
902	pte_val(pte) |= _PAGE_SPECIAL;
903	return pte;
904}
905
906#ifdef CONFIG_HUGETLB_PAGE
907static inline pte_t pte_mkhuge(pte_t pte)
908{
909	pte_val(pte) |= _PAGE_LARGE;
910	return pte;
911}
912#endif
913
914static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
915{
916	unsigned long pto = (unsigned long) ptep;
917
918	/* Invalidation + global TLB flush for the pte */
919	asm volatile(
920		"	ipte	%2,%3"
921		: "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
922}
923
924static inline void __ptep_ipte_local(unsigned long address, pte_t *ptep)
925{
926	unsigned long pto = (unsigned long) ptep;
927
928	/* Invalidation + local TLB flush for the pte */
929	asm volatile(
930		"	.insn rrf,0xb2210000,%2,%3,0,1"
931		: "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
932}
933
934static inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep)
935{
936	unsigned long pto = (unsigned long) ptep;
937
938	/* Invalidate a range of ptes + global TLB flush of the ptes */
939	do {
940		asm volatile(
941			"	.insn rrf,0xb2210000,%2,%0,%1,0"
942			: "+a" (address), "+a" (nr) : "a" (pto) : "memory");
943	} while (nr != 255);
944}
945
946static inline void ptep_flush_direct(struct mm_struct *mm,
947				     unsigned long address, pte_t *ptep)
948{
949	int active, count;
950
951	if (pte_val(*ptep) & _PAGE_INVALID)
952		return;
953	active = (mm == current->active_mm) ? 1 : 0;
954	count = atomic_add_return(0x10000, &mm->context.attach_count);
955	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
956	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
957		__ptep_ipte_local(address, ptep);
958	else
959		__ptep_ipte(address, ptep);
960	atomic_sub(0x10000, &mm->context.attach_count);
961}
962
963static inline void ptep_flush_lazy(struct mm_struct *mm,
964				   unsigned long address, pte_t *ptep)
965{
966	int active, count;
967
968	if (pte_val(*ptep) & _PAGE_INVALID)
969		return;
970	active = (mm == current->active_mm) ? 1 : 0;
971	count = atomic_add_return(0x10000, &mm->context.attach_count);
972	if ((count & 0xffff) <= active) {
973		pte_val(*ptep) |= _PAGE_INVALID;
974		mm->context.flush_mm = 1;
975	} else
976		__ptep_ipte(address, ptep);
977	atomic_sub(0x10000, &mm->context.attach_count);
978}
979
980/*
981 * Get (and clear) the user dirty bit for a pte.
982 */
983static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
984						 unsigned long addr,
985						 pte_t *ptep)
986{
987	pgste_t pgste;
988	pte_t pte;
989	int dirty;
990
991	if (!mm_has_pgste(mm))
992		return 0;
993	pgste = pgste_get_lock(ptep);
994	dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
995	pgste_val(pgste) &= ~PGSTE_UC_BIT;
996	pte = *ptep;
997	if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
998		pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
999		__ptep_ipte(addr, ptep);
1000		if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
1001			pte_val(pte) |= _PAGE_PROTECT;
1002		else
1003			pte_val(pte) |= _PAGE_INVALID;
1004		*ptep = pte;
1005	}
1006	pgste_set_unlock(ptep, pgste);
1007	return dirty;
1008}
1009
1010#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
1011static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
1012					    unsigned long addr, pte_t *ptep)
1013{
1014	pgste_t pgste;
1015	pte_t pte, oldpte;
1016	int young;
1017
1018	if (mm_has_pgste(vma->vm_mm)) {
1019		pgste = pgste_get_lock(ptep);
1020		pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
1021	}
1022
1023	oldpte = pte = *ptep;
1024	ptep_flush_direct(vma->vm_mm, addr, ptep);
1025	young = pte_young(pte);
1026	pte = pte_mkold(pte);
1027
1028	if (mm_has_pgste(vma->vm_mm)) {
1029		pgste = pgste_update_all(&oldpte, pgste, vma->vm_mm);
1030		pgste = pgste_set_pte(ptep, pgste, pte);
1031		pgste_set_unlock(ptep, pgste);
1032	} else
1033		*ptep = pte;
1034
1035	return young;
1036}
1037
1038#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
1039static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
1040					 unsigned long address, pte_t *ptep)
1041{
1042	return ptep_test_and_clear_young(vma, address, ptep);
1043}
1044
1045/*
1046 * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
1047 * both clear the TLB for the unmapped pte. The reason is that
1048 * ptep_get_and_clear is used in common code (e.g. change_pte_range)
1049 * to modify an active pte. The sequence is
1050 *   1) ptep_get_and_clear
1051 *   2) set_pte_at
1052 *   3) flush_tlb_range
1053 * On s390 the tlb needs to get flushed with the modification of the pte
1054 * if the pte is active. The only way how this can be implemented is to
1055 * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range
1056 * is a nop.
1057 */
1058#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
1059static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
1060				       unsigned long address, pte_t *ptep)
1061{
1062	pgste_t pgste;
1063	pte_t pte;
1064
1065	if (mm_has_pgste(mm)) {
1066		pgste = pgste_get_lock(ptep);
1067		pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1068	}
1069
1070	pte = *ptep;
1071	ptep_flush_lazy(mm, address, ptep);
1072	pte_val(*ptep) = _PAGE_INVALID;
1073
1074	if (mm_has_pgste(mm)) {
1075		pgste = pgste_update_all(&pte, pgste, mm);
1076		pgste_set_unlock(ptep, pgste);
1077	}
1078	return pte;
1079}
1080
1081#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
1082static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
1083					   unsigned long address,
1084					   pte_t *ptep)
1085{
1086	pgste_t pgste;
1087	pte_t pte;
1088
1089	if (mm_has_pgste(mm)) {
1090		pgste = pgste_get_lock(ptep);
1091		pgste_ipte_notify(mm, address, ptep, pgste);
1092	}
1093
1094	pte = *ptep;
1095	ptep_flush_lazy(mm, address, ptep);
1096
1097	if (mm_has_pgste(mm)) {
1098		pgste = pgste_update_all(&pte, pgste, mm);
1099		pgste_set(ptep, pgste);
1100	}
1101	return pte;
1102}
1103
1104static inline void ptep_modify_prot_commit(struct mm_struct *mm,
1105					   unsigned long address,
1106					   pte_t *ptep, pte_t pte)
1107{
1108	pgste_t pgste;
1109
1110	if (mm_has_pgste(mm)) {
1111		pgste = pgste_get(ptep);
1112		pgste_set_key(ptep, pgste, pte, mm);
1113		pgste = pgste_set_pte(ptep, pgste, pte);
1114		pgste_set_unlock(ptep, pgste);
1115	} else
1116		*ptep = pte;
1117}
1118
1119#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
1120static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
1121				     unsigned long address, pte_t *ptep)
1122{
1123	pgste_t pgste;
1124	pte_t pte;
1125
1126	if (mm_has_pgste(vma->vm_mm)) {
1127		pgste = pgste_get_lock(ptep);
1128		pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
1129	}
1130
1131	pte = *ptep;
1132	ptep_flush_direct(vma->vm_mm, address, ptep);
1133	pte_val(*ptep) = _PAGE_INVALID;
1134
1135	if (mm_has_pgste(vma->vm_mm)) {
1136		if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
1137		    _PGSTE_GPS_USAGE_UNUSED)
1138			pte_val(pte) |= _PAGE_UNUSED;
1139		pgste = pgste_update_all(&pte, pgste, vma->vm_mm);
1140		pgste_set_unlock(ptep, pgste);
1141	}
1142	return pte;
1143}
1144
1145/*
1146 * The batched pte unmap code uses ptep_get_and_clear_full to clear the
1147 * ptes. Here an optimization is possible. tlb_gather_mmu flushes all
1148 * tlbs of an mm if it can guarantee that the ptes of the mm_struct
1149 * cannot be accessed while the batched unmap is running. In this case
1150 * full==1 and a simple pte_clear is enough. See tlb.h.
1151 */
1152#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
1153static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
1154					    unsigned long address,
1155					    pte_t *ptep, int full)
1156{
1157	pgste_t pgste;
1158	pte_t pte;
1159
1160	if (!full && mm_has_pgste(mm)) {
1161		pgste = pgste_get_lock(ptep);
1162		pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1163	}
1164
1165	pte = *ptep;
1166	if (!full)
1167		ptep_flush_lazy(mm, address, ptep);
1168	pte_val(*ptep) = _PAGE_INVALID;
1169
1170	if (!full && mm_has_pgste(mm)) {
1171		pgste = pgste_update_all(&pte, pgste, mm);
1172		pgste_set_unlock(ptep, pgste);
1173	}
1174	return pte;
1175}
1176
1177#define __HAVE_ARCH_PTEP_SET_WRPROTECT
1178static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
1179				       unsigned long address, pte_t *ptep)
1180{
1181	pgste_t pgste;
1182	pte_t pte = *ptep;
1183
1184	if (pte_write(pte)) {
1185		if (mm_has_pgste(mm)) {
1186			pgste = pgste_get_lock(ptep);
1187			pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1188		}
1189
1190		ptep_flush_lazy(mm, address, ptep);
1191		pte = pte_wrprotect(pte);
1192
1193		if (mm_has_pgste(mm)) {
1194			pgste = pgste_set_pte(ptep, pgste, pte);
1195			pgste_set_unlock(ptep, pgste);
1196		} else
1197			*ptep = pte;
1198	}
1199	return pte;
1200}
1201
1202#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
1203static inline int ptep_set_access_flags(struct vm_area_struct *vma,
1204					unsigned long address, pte_t *ptep,
1205					pte_t entry, int dirty)
1206{
1207	pgste_t pgste;
1208
1209	if (pte_same(*ptep, entry))
1210		return 0;
1211	if (mm_has_pgste(vma->vm_mm)) {
1212		pgste = pgste_get_lock(ptep);
1213		pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
1214	}
1215
1216	ptep_flush_direct(vma->vm_mm, address, ptep);
1217
1218	if (mm_has_pgste(vma->vm_mm)) {
1219		pgste_set_key(ptep, pgste, entry, vma->vm_mm);
1220		pgste = pgste_set_pte(ptep, pgste, entry);
1221		pgste_set_unlock(ptep, pgste);
1222	} else
1223		*ptep = entry;
1224	return 1;
1225}
1226
1227/*
1228 * Conversion functions: convert a page and protection to a page entry,
1229 * and a page entry and page directory to the page they refer to.
1230 */
1231static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
1232{
1233	pte_t __pte;
1234	pte_val(__pte) = physpage + pgprot_val(pgprot);
1235	return pte_mkyoung(__pte);
1236}
1237
1238static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
1239{
1240	unsigned long physpage = page_to_phys(page);
1241	pte_t __pte = mk_pte_phys(physpage, pgprot);
1242
1243	if (pte_write(__pte) && PageDirty(page))
1244		__pte = pte_mkdirty(__pte);
1245	return __pte;
1246}
1247
1248#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
1249#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
1250#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
1251#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
1252
1253#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
1254#define pgd_offset_k(address) pgd_offset(&init_mm, address)
1255
1256#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
1257#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN)
1258#define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN)
1259
1260static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
1261{
1262	pud_t *pud = (pud_t *) pgd;
1263	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
1264		pud = (pud_t *) pgd_deref(*pgd);
1265	return pud  + pud_index(address);
1266}
1267
1268static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
1269{
1270	pmd_t *pmd = (pmd_t *) pud;
1271	if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
1272		pmd = (pmd_t *) pud_deref(*pud);
1273	return pmd + pmd_index(address);
1274}
1275
1276#define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot))
1277#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
1278#define pte_page(x) pfn_to_page(pte_pfn(x))
1279
1280#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
1281
1282/* Find an entry in the lowest level page table.. */
1283#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
1284#define pte_offset_kernel(pmd, address) pte_offset(pmd,address)
1285#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
1286#define pte_unmap(pte) do { } while (0)
1287
1288#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
1289static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
1290{
1291	/*
1292	 * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx)
1293	 * Convert to segment table entry format.
1294	 */
1295	if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
1296		return pgprot_val(SEGMENT_NONE);
1297	if (pgprot_val(pgprot) == pgprot_val(PAGE_READ))
1298		return pgprot_val(SEGMENT_READ);
1299	return pgprot_val(SEGMENT_WRITE);
1300}
1301
1302static inline pmd_t pmd_wrprotect(pmd_t pmd)
1303{
1304	pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE;
1305	pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
1306	return pmd;
1307}
1308
1309static inline pmd_t pmd_mkwrite(pmd_t pmd)
1310{
1311	pmd_val(pmd) |= _SEGMENT_ENTRY_WRITE;
1312	if (pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
1313		return pmd;
1314	pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
1315	return pmd;
1316}
1317
1318static inline pmd_t pmd_mkclean(pmd_t pmd)
1319{
1320	if (pmd_large(pmd)) {
1321		pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY;
1322		pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
1323	}
1324	return pmd;
1325}
1326
1327static inline pmd_t pmd_mkdirty(pmd_t pmd)
1328{
1329	if (pmd_large(pmd)) {
1330		pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY;
1331		if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
1332			pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
1333	}
1334	return pmd;
1335}
1336
1337static inline pmd_t pmd_mkyoung(pmd_t pmd)
1338{
1339	if (pmd_large(pmd)) {
1340		pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
1341		if (pmd_val(pmd) & _SEGMENT_ENTRY_READ)
1342			pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
1343	}
1344	return pmd;
1345}
1346
1347static inline pmd_t pmd_mkold(pmd_t pmd)
1348{
1349	if (pmd_large(pmd)) {
1350		pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
1351		pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
1352	}
1353	return pmd;
1354}
1355
1356static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
1357{
1358	if (pmd_large(pmd)) {
1359		pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
1360			_SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
1361			_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SPLIT;
1362		pmd_val(pmd) |= massage_pgprot_pmd(newprot);
1363		if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
1364			pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
1365		if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG))
1366			pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
1367		return pmd;
1368	}
1369	pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN;
1370	pmd_val(pmd) |= massage_pgprot_pmd(newprot);
1371	return pmd;
1372}
1373
1374static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
1375{
1376	pmd_t __pmd;
1377	pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
1378	return __pmd;
1379}
1380
1381#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
1382
1383static inline void __pmdp_csp(pmd_t *pmdp)
1384{
1385	register unsigned long reg2 asm("2") = pmd_val(*pmdp);
1386	register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
1387					       _SEGMENT_ENTRY_INVALID;
1388	register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
1389
1390	asm volatile(
1391		"	csp %1,%3"
1392		: "=m" (*pmdp)
1393		: "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
1394}
1395
1396static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp)
1397{
1398	unsigned long sto;
1399
1400	sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
1401	asm volatile(
1402		"	.insn	rrf,0xb98e0000,%2,%3,0,0"
1403		: "=m" (*pmdp)
1404		: "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK))
1405		: "cc" );
1406}
1407
1408static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp)
1409{
1410	unsigned long sto;
1411
1412	sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
1413	asm volatile(
1414		"	.insn	rrf,0xb98e0000,%2,%3,0,1"
1415		: "=m" (*pmdp)
1416		: "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK))
1417		: "cc" );
1418}
1419
1420static inline void pmdp_flush_direct(struct mm_struct *mm,
1421				     unsigned long address, pmd_t *pmdp)
1422{
1423	int active, count;
1424
1425	if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
1426		return;
1427	if (!MACHINE_HAS_IDTE) {
1428		__pmdp_csp(pmdp);
1429		return;
1430	}
1431	active = (mm == current->active_mm) ? 1 : 0;
1432	count = atomic_add_return(0x10000, &mm->context.attach_count);
1433	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
1434	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
1435		__pmdp_idte_local(address, pmdp);
1436	else
1437		__pmdp_idte(address, pmdp);
1438	atomic_sub(0x10000, &mm->context.attach_count);
1439}
1440
1441static inline void pmdp_flush_lazy(struct mm_struct *mm,
1442				   unsigned long address, pmd_t *pmdp)
1443{
1444	int active, count;
1445
1446	if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
1447		return;
1448	active = (mm == current->active_mm) ? 1 : 0;
1449	count = atomic_add_return(0x10000, &mm->context.attach_count);
1450	if ((count & 0xffff) <= active) {
1451		pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
1452		mm->context.flush_mm = 1;
1453	} else if (MACHINE_HAS_IDTE)
1454		__pmdp_idte(address, pmdp);
1455	else
1456		__pmdp_csp(pmdp);
1457	atomic_sub(0x10000, &mm->context.attach_count);
1458}
1459
1460#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1461
1462#define __HAVE_ARCH_PGTABLE_DEPOSIT
1463extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
1464				       pgtable_t pgtable);
1465
1466#define __HAVE_ARCH_PGTABLE_WITHDRAW
1467extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
1468
1469static inline int pmd_trans_splitting(pmd_t pmd)
1470{
1471	return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) &&
1472		(pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT);
1473}
1474
1475static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
1476			      pmd_t *pmdp, pmd_t entry)
1477{
1478	*pmdp = entry;
1479}
1480
1481static inline pmd_t pmd_mkhuge(pmd_t pmd)
1482{
1483	pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
1484	pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
1485	pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
1486	return pmd;
1487}
1488
1489#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
1490static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
1491					    unsigned long address, pmd_t *pmdp)
1492{
1493	pmd_t pmd;
1494
1495	pmd = *pmdp;
1496	pmdp_flush_direct(vma->vm_mm, address, pmdp);
1497	*pmdp = pmd_mkold(pmd);
1498	return pmd_young(pmd);
1499}
1500
1501#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
1502static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
1503				       unsigned long address, pmd_t *pmdp)
1504{
1505	pmd_t pmd = *pmdp;
1506
1507	pmdp_flush_direct(mm, address, pmdp);
1508	pmd_clear(pmdp);
1509	return pmd;
1510}
1511
1512#define __HAVE_ARCH_PMDP_GET_AND_CLEAR_FULL
1513static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm,
1514					    unsigned long address,
1515					    pmd_t *pmdp, int full)
1516{
1517	pmd_t pmd = *pmdp;
1518
1519	if (!full)
1520		pmdp_flush_lazy(mm, address, pmdp);
1521	pmd_clear(pmdp);
1522	return pmd;
1523}
1524
1525#define __HAVE_ARCH_PMDP_CLEAR_FLUSH
1526static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
1527				     unsigned long address, pmd_t *pmdp)
1528{
1529	return pmdp_get_and_clear(vma->vm_mm, address, pmdp);
1530}
1531
1532#define __HAVE_ARCH_PMDP_INVALIDATE
1533static inline void pmdp_invalidate(struct vm_area_struct *vma,
1534				   unsigned long address, pmd_t *pmdp)
1535{
1536	pmdp_flush_direct(vma->vm_mm, address, pmdp);
1537}
1538
1539#define __HAVE_ARCH_PMDP_SET_WRPROTECT
1540static inline void pmdp_set_wrprotect(struct mm_struct *mm,
1541				      unsigned long address, pmd_t *pmdp)
1542{
1543	pmd_t pmd = *pmdp;
1544
1545	if (pmd_write(pmd)) {
1546		pmdp_flush_direct(mm, address, pmdp);
1547		set_pmd_at(mm, address, pmdp, pmd_wrprotect(pmd));
1548	}
1549}
1550
1551#define pfn_pmd(pfn, pgprot)	mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot))
1552#define mk_pmd(page, pgprot)	pfn_pmd(page_to_pfn(page), (pgprot))
1553
1554static inline int pmd_trans_huge(pmd_t pmd)
1555{
1556	return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE;
1557}
1558
1559static inline int has_transparent_hugepage(void)
1560{
1561	return MACHINE_HAS_HPAGE ? 1 : 0;
1562}
1563#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1564
1565/*
1566 * 64 bit swap entry format:
1567 * A page-table entry has some bits we have to treat in a special way.
1568 * Bits 52 and bit 55 have to be zero, otherwise an specification
1569 * exception will occur instead of a page translation exception. The
1570 * specifiation exception has the bad habit not to store necessary
1571 * information in the lowcore.
1572 * Bits 54 and 63 are used to indicate the page type.
1573 * A swap pte is indicated by bit pattern (pte & 0x201) == 0x200
1574 * This leaves the bits 0-51 and bits 56-62 to store type and offset.
1575 * We use the 5 bits from 57-61 for the type and the 52 bits from 0-51
1576 * for the offset.
1577 * |			  offset			|01100|type |00|
1578 * |0000000000111111111122222222223333333333444444444455|55555|55566|66|
1579 * |0123456789012345678901234567890123456789012345678901|23456|78901|23|
1580 */
1581
1582#define __SWP_OFFSET_MASK	((1UL << 52) - 1)
1583#define __SWP_OFFSET_SHIFT	12
1584#define __SWP_TYPE_MASK		((1UL << 5) - 1)
1585#define __SWP_TYPE_SHIFT	2
1586
1587static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
1588{
1589	pte_t pte;
1590
1591	pte_val(pte) = _PAGE_INVALID | _PAGE_PROTECT;
1592	pte_val(pte) |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT;
1593	pte_val(pte) |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT;
1594	return pte;
1595}
1596
1597static inline unsigned long __swp_type(swp_entry_t entry)
1598{
1599	return (entry.val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK;
1600}
1601
1602static inline unsigned long __swp_offset(swp_entry_t entry)
1603{
1604	return (entry.val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK;
1605}
1606
1607static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
1608{
1609	return (swp_entry_t) { pte_val(mk_swap_pte(type, offset)) };
1610}
1611
1612#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
1613#define __swp_entry_to_pte(x)	((pte_t) { (x).val })
1614
1615#endif /* !__ASSEMBLY__ */
1616
1617#define kern_addr_valid(addr)   (1)
1618
1619extern int vmem_add_mapping(unsigned long start, unsigned long size);
1620extern int vmem_remove_mapping(unsigned long start, unsigned long size);
1621extern int s390_enable_sie(void);
1622extern int s390_enable_skey(void);
1623extern void s390_reset_cmma(struct mm_struct *mm);
1624
1625/* s390 has a private copy of get unmapped area to deal with cache synonyms */
1626#define HAVE_ARCH_UNMAPPED_AREA
1627#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
1628
1629/*
1630 * No page table caches to initialise
1631 */
1632static inline void pgtable_cache_init(void) { }
1633static inline void check_pgt_cache(void) { }
1634
1635#include <asm-generic/pgtable.h>
1636
1637#endif /* _S390_PAGE_H */
1638