1#ifndef _LINUX_MMU_NOTIFIER_H 2#define _LINUX_MMU_NOTIFIER_H 3 4#include <linux/list.h> 5#include <linux/spinlock.h> 6#include <linux/mm_types.h> 7#include <linux/srcu.h> 8 9struct mmu_notifier; 10struct mmu_notifier_ops; 11 12#ifdef CONFIG_MMU_NOTIFIER 13 14/* 15 * The mmu notifier_mm structure is allocated and installed in 16 * mm->mmu_notifier_mm inside the mm_take_all_locks() protected 17 * critical section and it's released only when mm_count reaches zero 18 * in mmdrop(). 19 */ 20struct mmu_notifier_mm { 21 /* all mmu notifiers registerd in this mm are queued in this list */ 22 struct hlist_head list; 23 /* to serialize the list modifications and hlist_unhashed */ 24 spinlock_t lock; 25}; 26 27struct mmu_notifier_ops { 28 /* 29 * Called either by mmu_notifier_unregister or when the mm is 30 * being destroyed by exit_mmap, always before all pages are 31 * freed. This can run concurrently with other mmu notifier 32 * methods (the ones invoked outside the mm context) and it 33 * should tear down all secondary mmu mappings and freeze the 34 * secondary mmu. If this method isn't implemented you've to 35 * be sure that nothing could possibly write to the pages 36 * through the secondary mmu by the time the last thread with 37 * tsk->mm == mm exits. 38 * 39 * As side note: the pages freed after ->release returns could 40 * be immediately reallocated by the gart at an alias physical 41 * address with a different cache model, so if ->release isn't 42 * implemented because all _software_ driven memory accesses 43 * through the secondary mmu are terminated by the time the 44 * last thread of this mm quits, you've also to be sure that 45 * speculative _hardware_ operations can't allocate dirty 46 * cachelines in the cpu that could not be snooped and made 47 * coherent with the other read and write operations happening 48 * through the gart alias address, so leading to memory 49 * corruption. 50 */ 51 void (*release)(struct mmu_notifier *mn, 52 struct mm_struct *mm); 53 54 /* 55 * clear_flush_young is called after the VM is 56 * test-and-clearing the young/accessed bitflag in the 57 * pte. This way the VM will provide proper aging to the 58 * accesses to the page through the secondary MMUs and not 59 * only to the ones through the Linux pte. 60 * Start-end is necessary in case the secondary MMU is mapping the page 61 * at a smaller granularity than the primary MMU. 62 */ 63 int (*clear_flush_young)(struct mmu_notifier *mn, 64 struct mm_struct *mm, 65 unsigned long start, 66 unsigned long end); 67 68 /* 69 * clear_young is a lightweight version of clear_flush_young. Like the 70 * latter, it is supposed to test-and-clear the young/accessed bitflag 71 * in the secondary pte, but it may omit flushing the secondary tlb. 72 */ 73 int (*clear_young)(struct mmu_notifier *mn, 74 struct mm_struct *mm, 75 unsigned long start, 76 unsigned long end); 77 78 /* 79 * test_young is called to check the young/accessed bitflag in 80 * the secondary pte. This is used to know if the page is 81 * frequently used without actually clearing the flag or tearing 82 * down the secondary mapping on the page. 83 */ 84 int (*test_young)(struct mmu_notifier *mn, 85 struct mm_struct *mm, 86 unsigned long address); 87 88 /* 89 * change_pte is called in cases that pte mapping to page is changed: 90 * for example, when ksm remaps pte to point to a new shared page. 91 */ 92 void (*change_pte)(struct mmu_notifier *mn, 93 struct mm_struct *mm, 94 unsigned long address, 95 pte_t pte); 96 97 /* 98 * Before this is invoked any secondary MMU is still ok to 99 * read/write to the page previously pointed to by the Linux 100 * pte because the page hasn't been freed yet and it won't be 101 * freed until this returns. If required set_page_dirty has to 102 * be called internally to this method. 103 */ 104 void (*invalidate_page)(struct mmu_notifier *mn, 105 struct mm_struct *mm, 106 unsigned long address); 107 108 /* 109 * invalidate_range_start() and invalidate_range_end() must be 110 * paired and are called only when the mmap_sem and/or the 111 * locks protecting the reverse maps are held. If the subsystem 112 * can't guarantee that no additional references are taken to 113 * the pages in the range, it has to implement the 114 * invalidate_range() notifier to remove any references taken 115 * after invalidate_range_start(). 116 * 117 * Invalidation of multiple concurrent ranges may be 118 * optionally permitted by the driver. Either way the 119 * establishment of sptes is forbidden in the range passed to 120 * invalidate_range_begin/end for the whole duration of the 121 * invalidate_range_begin/end critical section. 122 * 123 * invalidate_range_start() is called when all pages in the 124 * range are still mapped and have at least a refcount of one. 125 * 126 * invalidate_range_end() is called when all pages in the 127 * range have been unmapped and the pages have been freed by 128 * the VM. 129 * 130 * The VM will remove the page table entries and potentially 131 * the page between invalidate_range_start() and 132 * invalidate_range_end(). If the page must not be freed 133 * because of pending I/O or other circumstances then the 134 * invalidate_range_start() callback (or the initial mapping 135 * by the driver) must make sure that the refcount is kept 136 * elevated. 137 * 138 * If the driver increases the refcount when the pages are 139 * initially mapped into an address space then either 140 * invalidate_range_start() or invalidate_range_end() may 141 * decrease the refcount. If the refcount is decreased on 142 * invalidate_range_start() then the VM can free pages as page 143 * table entries are removed. If the refcount is only 144 * droppped on invalidate_range_end() then the driver itself 145 * will drop the last refcount but it must take care to flush 146 * any secondary tlb before doing the final free on the 147 * page. Pages will no longer be referenced by the linux 148 * address space but may still be referenced by sptes until 149 * the last refcount is dropped. 150 */ 151 void (*invalidate_range_start)(struct mmu_notifier *mn, 152 struct mm_struct *mm, 153 unsigned long start, unsigned long end); 154 void (*invalidate_range_end)(struct mmu_notifier *mn, 155 struct mm_struct *mm, 156 unsigned long start, unsigned long end); 157 158 /* 159 * invalidate_range() is either called between 160 * invalidate_range_start() and invalidate_range_end() when the 161 * VM has to free pages that where unmapped, but before the 162 * pages are actually freed, or outside of _start()/_end() when 163 * a (remote) TLB is necessary. 164 * 165 * If invalidate_range() is used to manage a non-CPU TLB with 166 * shared page-tables, it not necessary to implement the 167 * invalidate_range_start()/end() notifiers, as 168 * invalidate_range() alread catches the points in time when an 169 * external TLB range needs to be flushed. 170 * 171 * The invalidate_range() function is called under the ptl 172 * spin-lock and not allowed to sleep. 173 * 174 * Note that this function might be called with just a sub-range 175 * of what was passed to invalidate_range_start()/end(), if 176 * called between those functions. 177 */ 178 void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm, 179 unsigned long start, unsigned long end); 180}; 181 182/* 183 * The notifier chains are protected by mmap_sem and/or the reverse map 184 * semaphores. Notifier chains are only changed when all reverse maps and 185 * the mmap_sem locks are taken. 186 * 187 * Therefore notifier chains can only be traversed when either 188 * 189 * 1. mmap_sem is held. 190 * 2. One of the reverse map locks is held (i_mmap_rwsem or anon_vma->rwsem). 191 * 3. No other concurrent thread can access the list (release) 192 */ 193struct mmu_notifier { 194 struct hlist_node hlist; 195 const struct mmu_notifier_ops *ops; 196}; 197 198static inline int mm_has_notifiers(struct mm_struct *mm) 199{ 200 return unlikely(mm->mmu_notifier_mm); 201} 202 203extern int mmu_notifier_register(struct mmu_notifier *mn, 204 struct mm_struct *mm); 205extern int __mmu_notifier_register(struct mmu_notifier *mn, 206 struct mm_struct *mm); 207extern void mmu_notifier_unregister(struct mmu_notifier *mn, 208 struct mm_struct *mm); 209extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, 210 struct mm_struct *mm); 211extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); 212extern void __mmu_notifier_release(struct mm_struct *mm); 213extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, 214 unsigned long start, 215 unsigned long end); 216extern int __mmu_notifier_clear_young(struct mm_struct *mm, 217 unsigned long start, 218 unsigned long end); 219extern int __mmu_notifier_test_young(struct mm_struct *mm, 220 unsigned long address); 221extern void __mmu_notifier_change_pte(struct mm_struct *mm, 222 unsigned long address, pte_t pte); 223extern void __mmu_notifier_invalidate_page(struct mm_struct *mm, 224 unsigned long address); 225extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, 226 unsigned long start, unsigned long end); 227extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, 228 unsigned long start, unsigned long end); 229extern void __mmu_notifier_invalidate_range(struct mm_struct *mm, 230 unsigned long start, unsigned long end); 231 232static inline void mmu_notifier_release(struct mm_struct *mm) 233{ 234 if (mm_has_notifiers(mm)) 235 __mmu_notifier_release(mm); 236} 237 238static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, 239 unsigned long start, 240 unsigned long end) 241{ 242 if (mm_has_notifiers(mm)) 243 return __mmu_notifier_clear_flush_young(mm, start, end); 244 return 0; 245} 246 247static inline int mmu_notifier_clear_young(struct mm_struct *mm, 248 unsigned long start, 249 unsigned long end) 250{ 251 if (mm_has_notifiers(mm)) 252 return __mmu_notifier_clear_young(mm, start, end); 253 return 0; 254} 255 256static inline int mmu_notifier_test_young(struct mm_struct *mm, 257 unsigned long address) 258{ 259 if (mm_has_notifiers(mm)) 260 return __mmu_notifier_test_young(mm, address); 261 return 0; 262} 263 264static inline void mmu_notifier_change_pte(struct mm_struct *mm, 265 unsigned long address, pte_t pte) 266{ 267 if (mm_has_notifiers(mm)) 268 __mmu_notifier_change_pte(mm, address, pte); 269} 270 271static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, 272 unsigned long address) 273{ 274 if (mm_has_notifiers(mm)) 275 __mmu_notifier_invalidate_page(mm, address); 276} 277 278static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, 279 unsigned long start, unsigned long end) 280{ 281 if (mm_has_notifiers(mm)) 282 __mmu_notifier_invalidate_range_start(mm, start, end); 283} 284 285static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, 286 unsigned long start, unsigned long end) 287{ 288 if (mm_has_notifiers(mm)) 289 __mmu_notifier_invalidate_range_end(mm, start, end); 290} 291 292static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, 293 unsigned long start, unsigned long end) 294{ 295 if (mm_has_notifiers(mm)) 296 __mmu_notifier_invalidate_range(mm, start, end); 297} 298 299static inline void mmu_notifier_mm_init(struct mm_struct *mm) 300{ 301 mm->mmu_notifier_mm = NULL; 302} 303 304static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) 305{ 306 if (mm_has_notifiers(mm)) 307 __mmu_notifier_mm_destroy(mm); 308} 309 310#define ptep_clear_flush_young_notify(__vma, __address, __ptep) \ 311({ \ 312 int __young; \ 313 struct vm_area_struct *___vma = __vma; \ 314 unsigned long ___address = __address; \ 315 __young = ptep_clear_flush_young(___vma, ___address, __ptep); \ 316 __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ 317 ___address, \ 318 ___address + \ 319 PAGE_SIZE); \ 320 __young; \ 321}) 322 323#define pmdp_clear_flush_young_notify(__vma, __address, __pmdp) \ 324({ \ 325 int __young; \ 326 struct vm_area_struct *___vma = __vma; \ 327 unsigned long ___address = __address; \ 328 __young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \ 329 __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ 330 ___address, \ 331 ___address + \ 332 PMD_SIZE); \ 333 __young; \ 334}) 335 336#define ptep_clear_young_notify(__vma, __address, __ptep) \ 337({ \ 338 int __young; \ 339 struct vm_area_struct *___vma = __vma; \ 340 unsigned long ___address = __address; \ 341 __young = ptep_test_and_clear_young(___vma, ___address, __ptep);\ 342 __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address, \ 343 ___address + PAGE_SIZE); \ 344 __young; \ 345}) 346 347#define pmdp_clear_young_notify(__vma, __address, __pmdp) \ 348({ \ 349 int __young; \ 350 struct vm_area_struct *___vma = __vma; \ 351 unsigned long ___address = __address; \ 352 __young = pmdp_test_and_clear_young(___vma, ___address, __pmdp);\ 353 __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address, \ 354 ___address + PMD_SIZE); \ 355 __young; \ 356}) 357 358#define ptep_clear_flush_notify(__vma, __address, __ptep) \ 359({ \ 360 unsigned long ___addr = __address & PAGE_MASK; \ 361 struct mm_struct *___mm = (__vma)->vm_mm; \ 362 pte_t ___pte; \ 363 \ 364 ___pte = ptep_clear_flush(__vma, __address, __ptep); \ 365 mmu_notifier_invalidate_range(___mm, ___addr, \ 366 ___addr + PAGE_SIZE); \ 367 \ 368 ___pte; \ 369}) 370 371#define pmdp_huge_clear_flush_notify(__vma, __haddr, __pmd) \ 372({ \ 373 unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ 374 struct mm_struct *___mm = (__vma)->vm_mm; \ 375 pmd_t ___pmd; \ 376 \ 377 ___pmd = pmdp_huge_clear_flush(__vma, __haddr, __pmd); \ 378 mmu_notifier_invalidate_range(___mm, ___haddr, \ 379 ___haddr + HPAGE_PMD_SIZE); \ 380 \ 381 ___pmd; \ 382}) 383 384#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \ 385({ \ 386 unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ 387 pmd_t ___pmd; \ 388 \ 389 ___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd); \ 390 mmu_notifier_invalidate_range(__mm, ___haddr, \ 391 ___haddr + HPAGE_PMD_SIZE); \ 392 \ 393 ___pmd; \ 394}) 395 396/* 397 * set_pte_at_notify() sets the pte _after_ running the notifier. 398 * This is safe to start by updating the secondary MMUs, because the primary MMU 399 * pte invalidate must have already happened with a ptep_clear_flush() before 400 * set_pte_at_notify() has been invoked. Updating the secondary MMUs first is 401 * required when we change both the protection of the mapping from read-only to 402 * read-write and the pfn (like during copy on write page faults). Otherwise the 403 * old page would remain mapped readonly in the secondary MMUs after the new 404 * page is already writable by some CPU through the primary MMU. 405 */ 406#define set_pte_at_notify(__mm, __address, __ptep, __pte) \ 407({ \ 408 struct mm_struct *___mm = __mm; \ 409 unsigned long ___address = __address; \ 410 pte_t ___pte = __pte; \ 411 \ 412 mmu_notifier_change_pte(___mm, ___address, ___pte); \ 413 set_pte_at(___mm, ___address, __ptep, ___pte); \ 414}) 415 416extern void mmu_notifier_call_srcu(struct rcu_head *rcu, 417 void (*func)(struct rcu_head *rcu)); 418extern void mmu_notifier_synchronize(void); 419 420#else /* CONFIG_MMU_NOTIFIER */ 421 422static inline void mmu_notifier_release(struct mm_struct *mm) 423{ 424} 425 426static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, 427 unsigned long start, 428 unsigned long end) 429{ 430 return 0; 431} 432 433static inline int mmu_notifier_test_young(struct mm_struct *mm, 434 unsigned long address) 435{ 436 return 0; 437} 438 439static inline void mmu_notifier_change_pte(struct mm_struct *mm, 440 unsigned long address, pte_t pte) 441{ 442} 443 444static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, 445 unsigned long address) 446{ 447} 448 449static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, 450 unsigned long start, unsigned long end) 451{ 452} 453 454static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, 455 unsigned long start, unsigned long end) 456{ 457} 458 459static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, 460 unsigned long start, unsigned long end) 461{ 462} 463 464static inline void mmu_notifier_mm_init(struct mm_struct *mm) 465{ 466} 467 468static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) 469{ 470} 471 472#define ptep_clear_flush_young_notify ptep_clear_flush_young 473#define pmdp_clear_flush_young_notify pmdp_clear_flush_young 474#define ptep_clear_young_notify ptep_test_and_clear_young 475#define pmdp_clear_young_notify pmdp_test_and_clear_young 476#define ptep_clear_flush_notify ptep_clear_flush 477#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush 478#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear 479#define set_pte_at_notify set_pte_at 480 481#endif /* CONFIG_MMU_NOTIFIER */ 482 483#endif /* _LINUX_MMU_NOTIFIER_H */ 484