root/mm/sparse.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. page_to_nid
  2. set_section_nid
  3. set_section_nid
  4. sparse_index_alloc
  5. sparse_index_init
  6. sparse_index_init
  7. __section_nr
  8. __section_nr
  9. sparse_encode_early_nid
  10. sparse_early_nid
  11. mminit_validate_memmodel_limits
  12. section_mark_present
  13. next_present_section_nr
  14. first_present_section_nr
  15. subsection_mask_set
  16. subsection_map_init
  17. memory_present
  18. memblocks_present
  19. sparse_encode_mem_map
  20. sparse_decode_mem_map
  21. sparse_init_one_section
  22. usemap_size
  23. mem_section_usage_size
  24. sparse_early_usemaps_alloc_pgdat_section
  25. check_usemap_section_nr
  26. sparse_early_usemaps_alloc_pgdat_section
  27. check_usemap_section_nr
  28. section_map_size
  29. section_map_size
  30. __populate_section_memmap
  31. sparse_buffer_free
  32. sparse_buffer_init
  33. sparse_buffer_fini
  34. sparse_buffer_alloc
  35. vmemmap_populate_print_last
  36. sparse_init_nid
  37. sparse_init
  38. online_mem_sections
  39. offline_mem_sections
  40. populate_section_memmap
  41. depopulate_section_memmap
  42. free_map_bootmem
  43. populate_section_memmap
  44. depopulate_section_memmap
  45. free_map_bootmem
  46. section_deactivate
  47. section_activate
  48. sparse_add_section
  49. clear_hwpoisoned_pages
  50. clear_hwpoisoned_pages
  51. sparse_remove_section

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * sparse memory mappings.
   4  */
   5 #include <linux/mm.h>
   6 #include <linux/slab.h>
   7 #include <linux/mmzone.h>
   8 #include <linux/memblock.h>
   9 #include <linux/compiler.h>
  10 #include <linux/highmem.h>
  11 #include <linux/export.h>
  12 #include <linux/spinlock.h>
  13 #include <linux/vmalloc.h>
  14 #include <linux/swap.h>
  15 #include <linux/swapops.h>
  16 
  17 #include "internal.h"
  18 #include <asm/dma.h>
  19 #include <asm/pgalloc.h>
  20 #include <asm/pgtable.h>
  21 
  22 /*
  23  * Permanent SPARSEMEM data:
  24  *
  25  * 1) mem_section       - memory sections, mem_map's for valid memory
  26  */
  27 #ifdef CONFIG_SPARSEMEM_EXTREME
  28 struct mem_section **mem_section;
  29 #else
  30 struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
  31         ____cacheline_internodealigned_in_smp;
  32 #endif
  33 EXPORT_SYMBOL(mem_section);
  34 
  35 #ifdef NODE_NOT_IN_PAGE_FLAGS
  36 /*
  37  * If we did not store the node number in the page then we have to
  38  * do a lookup in the section_to_node_table in order to find which
  39  * node the page belongs to.
  40  */
  41 #if MAX_NUMNODES <= 256
  42 static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  43 #else
  44 static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  45 #endif
  46 
  47 int page_to_nid(const struct page *page)
  48 {
  49         return section_to_node_table[page_to_section(page)];
  50 }
  51 EXPORT_SYMBOL(page_to_nid);
  52 
  53 static void set_section_nid(unsigned long section_nr, int nid)
  54 {
  55         section_to_node_table[section_nr] = nid;
  56 }
  57 #else /* !NODE_NOT_IN_PAGE_FLAGS */
  58 static inline void set_section_nid(unsigned long section_nr, int nid)
  59 {
  60 }
  61 #endif
  62 
  63 #ifdef CONFIG_SPARSEMEM_EXTREME
  64 static noinline struct mem_section __ref *sparse_index_alloc(int nid)
  65 {
  66         struct mem_section *section = NULL;
  67         unsigned long array_size = SECTIONS_PER_ROOT *
  68                                    sizeof(struct mem_section);
  69 
  70         if (slab_is_available()) {
  71                 section = kzalloc_node(array_size, GFP_KERNEL, nid);
  72         } else {
  73                 section = memblock_alloc_node(array_size, SMP_CACHE_BYTES,
  74                                               nid);
  75                 if (!section)
  76                         panic("%s: Failed to allocate %lu bytes nid=%d\n",
  77                               __func__, array_size, nid);
  78         }
  79 
  80         return section;
  81 }
  82 
  83 static int __meminit sparse_index_init(unsigned long section_nr, int nid)
  84 {
  85         unsigned long root = SECTION_NR_TO_ROOT(section_nr);
  86         struct mem_section *section;
  87 
  88         /*
  89          * An existing section is possible in the sub-section hotplug
  90          * case. First hot-add instantiates, follow-on hot-add reuses
  91          * the existing section.
  92          *
  93          * The mem_hotplug_lock resolves the apparent race below.
  94          */
  95         if (mem_section[root])
  96                 return 0;
  97 
  98         section = sparse_index_alloc(nid);
  99         if (!section)
 100                 return -ENOMEM;
 101 
 102         mem_section[root] = section;
 103 
 104         return 0;
 105 }
 106 #else /* !SPARSEMEM_EXTREME */
 107 static inline int sparse_index_init(unsigned long section_nr, int nid)
 108 {
 109         return 0;
 110 }
 111 #endif
 112 
 113 #ifdef CONFIG_SPARSEMEM_EXTREME
 114 unsigned long __section_nr(struct mem_section *ms)
 115 {
 116         unsigned long root_nr;
 117         struct mem_section *root = NULL;
 118 
 119         for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
 120                 root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
 121                 if (!root)
 122                         continue;
 123 
 124                 if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
 125                      break;
 126         }
 127 
 128         VM_BUG_ON(!root);
 129 
 130         return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
 131 }
 132 #else
 133 unsigned long __section_nr(struct mem_section *ms)
 134 {
 135         return (unsigned long)(ms - mem_section[0]);
 136 }
 137 #endif
 138 
 139 /*
 140  * During early boot, before section_mem_map is used for an actual
 141  * mem_map, we use section_mem_map to store the section's NUMA
 142  * node.  This keeps us from having to use another data structure.  The
 143  * node information is cleared just before we store the real mem_map.
 144  */
 145 static inline unsigned long sparse_encode_early_nid(int nid)
 146 {
 147         return (nid << SECTION_NID_SHIFT);
 148 }
 149 
 150 static inline int sparse_early_nid(struct mem_section *section)
 151 {
 152         return (section->section_mem_map >> SECTION_NID_SHIFT);
 153 }
 154 
 155 /* Validate the physical addressing limitations of the model */
 156 void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
 157                                                 unsigned long *end_pfn)
 158 {
 159         unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
 160 
 161         /*
 162          * Sanity checks - do not allow an architecture to pass
 163          * in larger pfns than the maximum scope of sparsemem:
 164          */
 165         if (*start_pfn > max_sparsemem_pfn) {
 166                 mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
 167                         "Start of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
 168                         *start_pfn, *end_pfn, max_sparsemem_pfn);
 169                 WARN_ON_ONCE(1);
 170                 *start_pfn = max_sparsemem_pfn;
 171                 *end_pfn = max_sparsemem_pfn;
 172         } else if (*end_pfn > max_sparsemem_pfn) {
 173                 mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
 174                         "End of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
 175                         *start_pfn, *end_pfn, max_sparsemem_pfn);
 176                 WARN_ON_ONCE(1);
 177                 *end_pfn = max_sparsemem_pfn;
 178         }
 179 }
 180 
 181 /*
 182  * There are a number of times that we loop over NR_MEM_SECTIONS,
 183  * looking for section_present() on each.  But, when we have very
 184  * large physical address spaces, NR_MEM_SECTIONS can also be
 185  * very large which makes the loops quite long.
 186  *
 187  * Keeping track of this gives us an easy way to break out of
 188  * those loops early.
 189  */
 190 unsigned long __highest_present_section_nr;
 191 static void section_mark_present(struct mem_section *ms)
 192 {
 193         unsigned long section_nr = __section_nr(ms);
 194 
 195         if (section_nr > __highest_present_section_nr)
 196                 __highest_present_section_nr = section_nr;
 197 
 198         ms->section_mem_map |= SECTION_MARKED_PRESENT;
 199 }
 200 
 201 static inline unsigned long next_present_section_nr(unsigned long section_nr)
 202 {
 203         do {
 204                 section_nr++;
 205                 if (present_section_nr(section_nr))
 206                         return section_nr;
 207         } while ((section_nr <= __highest_present_section_nr));
 208 
 209         return -1;
 210 }
 211 #define for_each_present_section_nr(start, section_nr)          \
 212         for (section_nr = next_present_section_nr(start-1);     \
 213              ((section_nr != -1) &&                             \
 214               (section_nr <= __highest_present_section_nr));    \
 215              section_nr = next_present_section_nr(section_nr))
 216 
 217 static inline unsigned long first_present_section_nr(void)
 218 {
 219         return next_present_section_nr(-1);
 220 }
 221 
 222 static void subsection_mask_set(unsigned long *map, unsigned long pfn,
 223                 unsigned long nr_pages)
 224 {
 225         int idx = subsection_map_index(pfn);
 226         int end = subsection_map_index(pfn + nr_pages - 1);
 227 
 228         bitmap_set(map, idx, end - idx + 1);
 229 }
 230 
 231 void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
 232 {
 233         int end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
 234         unsigned long nr, start_sec = pfn_to_section_nr(pfn);
 235 
 236         if (!nr_pages)
 237                 return;
 238 
 239         for (nr = start_sec; nr <= end_sec; nr++) {
 240                 struct mem_section *ms;
 241                 unsigned long pfns;
 242 
 243                 pfns = min(nr_pages, PAGES_PER_SECTION
 244                                 - (pfn & ~PAGE_SECTION_MASK));
 245                 ms = __nr_to_section(nr);
 246                 subsection_mask_set(ms->usage->subsection_map, pfn, pfns);
 247 
 248                 pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr,
 249                                 pfns, subsection_map_index(pfn),
 250                                 subsection_map_index(pfn + pfns - 1));
 251 
 252                 pfn += pfns;
 253                 nr_pages -= pfns;
 254         }
 255 }
 256 
 257 /* Record a memory area against a node. */
 258 void __init memory_present(int nid, unsigned long start, unsigned long end)
 259 {
 260         unsigned long pfn;
 261 
 262 #ifdef CONFIG_SPARSEMEM_EXTREME
 263         if (unlikely(!mem_section)) {
 264                 unsigned long size, align;
 265 
 266                 size = sizeof(struct mem_section*) * NR_SECTION_ROOTS;
 267                 align = 1 << (INTERNODE_CACHE_SHIFT);
 268                 mem_section = memblock_alloc(size, align);
 269                 if (!mem_section)
 270                         panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
 271                               __func__, size, align);
 272         }
 273 #endif
 274 
 275         start &= PAGE_SECTION_MASK;
 276         mminit_validate_memmodel_limits(&start, &end);
 277         for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
 278                 unsigned long section = pfn_to_section_nr(pfn);
 279                 struct mem_section *ms;
 280 
 281                 sparse_index_init(section, nid);
 282                 set_section_nid(section, nid);
 283 
 284                 ms = __nr_to_section(section);
 285                 if (!ms->section_mem_map) {
 286                         ms->section_mem_map = sparse_encode_early_nid(nid) |
 287                                                         SECTION_IS_ONLINE;
 288                         section_mark_present(ms);
 289                 }
 290         }
 291 }
 292 
 293 /*
 294  * Mark all memblocks as present using memory_present(). This is a
 295  * convienence function that is useful for a number of arches
 296  * to mark all of the systems memory as present during initialization.
 297  */
 298 void __init memblocks_present(void)
 299 {
 300         struct memblock_region *reg;
 301 
 302         for_each_memblock(memory, reg) {
 303                 memory_present(memblock_get_region_node(reg),
 304                                memblock_region_memory_base_pfn(reg),
 305                                memblock_region_memory_end_pfn(reg));
 306         }
 307 }
 308 
 309 /*
 310  * Subtle, we encode the real pfn into the mem_map such that
 311  * the identity pfn - section_mem_map will return the actual
 312  * physical page frame number.
 313  */
 314 static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
 315 {
 316         unsigned long coded_mem_map =
 317                 (unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
 318         BUILD_BUG_ON(SECTION_MAP_LAST_BIT > (1UL<<PFN_SECTION_SHIFT));
 319         BUG_ON(coded_mem_map & ~SECTION_MAP_MASK);
 320         return coded_mem_map;
 321 }
 322 
 323 /*
 324  * Decode mem_map from the coded memmap
 325  */
 326 struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
 327 {
 328         /* mask off the extra low bits of information */
 329         coded_mem_map &= SECTION_MAP_MASK;
 330         return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
 331 }
 332 
 333 static void __meminit sparse_init_one_section(struct mem_section *ms,
 334                 unsigned long pnum, struct page *mem_map,
 335                 struct mem_section_usage *usage, unsigned long flags)
 336 {
 337         ms->section_mem_map &= ~SECTION_MAP_MASK;
 338         ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum)
 339                 | SECTION_HAS_MEM_MAP | flags;
 340         ms->usage = usage;
 341 }
 342 
 343 static unsigned long usemap_size(void)
 344 {
 345         return BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS) * sizeof(unsigned long);
 346 }
 347 
 348 size_t mem_section_usage_size(void)
 349 {
 350         return sizeof(struct mem_section_usage) + usemap_size();
 351 }
 352 
 353 #ifdef CONFIG_MEMORY_HOTREMOVE
 354 static struct mem_section_usage * __init
 355 sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
 356                                          unsigned long size)
 357 {
 358         struct mem_section_usage *usage;
 359         unsigned long goal, limit;
 360         int nid;
 361         /*
 362          * A page may contain usemaps for other sections preventing the
 363          * page being freed and making a section unremovable while
 364          * other sections referencing the usemap remain active. Similarly,
 365          * a pgdat can prevent a section being removed. If section A
 366          * contains a pgdat and section B contains the usemap, both
 367          * sections become inter-dependent. This allocates usemaps
 368          * from the same section as the pgdat where possible to avoid
 369          * this problem.
 370          */
 371         goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
 372         limit = goal + (1UL << PA_SECTION_SHIFT);
 373         nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
 374 again:
 375         usage = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, goal, limit, nid);
 376         if (!usage && limit) {
 377                 limit = 0;
 378                 goto again;
 379         }
 380         return usage;
 381 }
 382 
 383 static void __init check_usemap_section_nr(int nid,
 384                 struct mem_section_usage *usage)
 385 {
 386         unsigned long usemap_snr, pgdat_snr;
 387         static unsigned long old_usemap_snr;
 388         static unsigned long old_pgdat_snr;
 389         struct pglist_data *pgdat = NODE_DATA(nid);
 390         int usemap_nid;
 391 
 392         /* First call */
 393         if (!old_usemap_snr) {
 394                 old_usemap_snr = NR_MEM_SECTIONS;
 395                 old_pgdat_snr = NR_MEM_SECTIONS;
 396         }
 397 
 398         usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT);
 399         pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
 400         if (usemap_snr == pgdat_snr)
 401                 return;
 402 
 403         if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
 404                 /* skip redundant message */
 405                 return;
 406 
 407         old_usemap_snr = usemap_snr;
 408         old_pgdat_snr = pgdat_snr;
 409 
 410         usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
 411         if (usemap_nid != nid) {
 412                 pr_info("node %d must be removed before remove section %ld\n",
 413                         nid, usemap_snr);
 414                 return;
 415         }
 416         /*
 417          * There is a circular dependency.
 418          * Some platforms allow un-removable section because they will just
 419          * gather other removable sections for dynamic partitioning.
 420          * Just notify un-removable section's number here.
 421          */
 422         pr_info("Section %ld and %ld (node %d) have a circular dependency on usemap and pgdat allocations\n",
 423                 usemap_snr, pgdat_snr, nid);
 424 }
 425 #else
 426 static struct mem_section_usage * __init
 427 sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
 428                                          unsigned long size)
 429 {
 430         return memblock_alloc_node(size, SMP_CACHE_BYTES, pgdat->node_id);
 431 }
 432 
 433 static void __init check_usemap_section_nr(int nid,
 434                 struct mem_section_usage *usage)
 435 {
 436 }
 437 #endif /* CONFIG_MEMORY_HOTREMOVE */
 438 
 439 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 440 static unsigned long __init section_map_size(void)
 441 {
 442         return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE);
 443 }
 444 
 445 #else
 446 static unsigned long __init section_map_size(void)
 447 {
 448         return PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
 449 }
 450 
 451 struct page __init *__populate_section_memmap(unsigned long pfn,
 452                 unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
 453 {
 454         unsigned long size = section_map_size();
 455         struct page *map = sparse_buffer_alloc(size);
 456         phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
 457 
 458         if (map)
 459                 return map;
 460 
 461         map = memblock_alloc_try_nid(size,
 462                                           PAGE_SIZE, addr,
 463                                           MEMBLOCK_ALLOC_ACCESSIBLE, nid);
 464         if (!map)
 465                 panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa\n",
 466                       __func__, size, PAGE_SIZE, nid, &addr);
 467 
 468         return map;
 469 }
 470 #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
 471 
 472 static void *sparsemap_buf __meminitdata;
 473 static void *sparsemap_buf_end __meminitdata;
 474 
 475 static inline void __meminit sparse_buffer_free(unsigned long size)
 476 {
 477         WARN_ON(!sparsemap_buf || size == 0);
 478         memblock_free_early(__pa(sparsemap_buf), size);
 479 }
 480 
 481 static void __init sparse_buffer_init(unsigned long size, int nid)
 482 {
 483         phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
 484         WARN_ON(sparsemap_buf); /* forgot to call sparse_buffer_fini()? */
 485         sparsemap_buf =
 486                 memblock_alloc_try_nid_raw(size, PAGE_SIZE,
 487                                                 addr,
 488                                                 MEMBLOCK_ALLOC_ACCESSIBLE, nid);
 489         sparsemap_buf_end = sparsemap_buf + size;
 490 }
 491 
 492 static void __init sparse_buffer_fini(void)
 493 {
 494         unsigned long size = sparsemap_buf_end - sparsemap_buf;
 495 
 496         if (sparsemap_buf && size > 0)
 497                 sparse_buffer_free(size);
 498         sparsemap_buf = NULL;
 499 }
 500 
 501 void * __meminit sparse_buffer_alloc(unsigned long size)
 502 {
 503         void *ptr = NULL;
 504 
 505         if (sparsemap_buf) {
 506                 ptr = (void *) roundup((unsigned long)sparsemap_buf, size);
 507                 if (ptr + size > sparsemap_buf_end)
 508                         ptr = NULL;
 509                 else {
 510                         /* Free redundant aligned space */
 511                         if ((unsigned long)(ptr - sparsemap_buf) > 0)
 512                                 sparse_buffer_free((unsigned long)(ptr - sparsemap_buf));
 513                         sparsemap_buf = ptr + size;
 514                 }
 515         }
 516         return ptr;
 517 }
 518 
 519 void __weak __meminit vmemmap_populate_print_last(void)
 520 {
 521 }
 522 
 523 /*
 524  * Initialize sparse on a specific node. The node spans [pnum_begin, pnum_end)
 525  * And number of present sections in this node is map_count.
 526  */
 527 static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
 528                                    unsigned long pnum_end,
 529                                    unsigned long map_count)
 530 {
 531         struct mem_section_usage *usage;
 532         unsigned long pnum;
 533         struct page *map;
 534 
 535         usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid),
 536                         mem_section_usage_size() * map_count);
 537         if (!usage) {
 538                 pr_err("%s: node[%d] usemap allocation failed", __func__, nid);
 539                 goto failed;
 540         }
 541         sparse_buffer_init(map_count * section_map_size(), nid);
 542         for_each_present_section_nr(pnum_begin, pnum) {
 543                 unsigned long pfn = section_nr_to_pfn(pnum);
 544 
 545                 if (pnum >= pnum_end)
 546                         break;
 547 
 548                 map = __populate_section_memmap(pfn, PAGES_PER_SECTION,
 549                                 nid, NULL);
 550                 if (!map) {
 551                         pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.",
 552                                __func__, nid);
 553                         pnum_begin = pnum;
 554                         goto failed;
 555                 }
 556                 check_usemap_section_nr(nid, usage);
 557                 sparse_init_one_section(__nr_to_section(pnum), pnum, map, usage,
 558                                 SECTION_IS_EARLY);
 559                 usage = (void *) usage + mem_section_usage_size();
 560         }
 561         sparse_buffer_fini();
 562         return;
 563 failed:
 564         /* We failed to allocate, mark all the following pnums as not present */
 565         for_each_present_section_nr(pnum_begin, pnum) {
 566                 struct mem_section *ms;
 567 
 568                 if (pnum >= pnum_end)
 569                         break;
 570                 ms = __nr_to_section(pnum);
 571                 ms->section_mem_map = 0;
 572         }
 573 }
 574 
 575 /*
 576  * Allocate the accumulated non-linear sections, allocate a mem_map
 577  * for each and record the physical to section mapping.
 578  */
 579 void __init sparse_init(void)
 580 {
 581         unsigned long pnum_begin = first_present_section_nr();
 582         int nid_begin = sparse_early_nid(__nr_to_section(pnum_begin));
 583         unsigned long pnum_end, map_count = 1;
 584 
 585         /* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */
 586         set_pageblock_order();
 587 
 588         for_each_present_section_nr(pnum_begin + 1, pnum_end) {
 589                 int nid = sparse_early_nid(__nr_to_section(pnum_end));
 590 
 591                 if (nid == nid_begin) {
 592                         map_count++;
 593                         continue;
 594                 }
 595                 /* Init node with sections in range [pnum_begin, pnum_end) */
 596                 sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
 597                 nid_begin = nid;
 598                 pnum_begin = pnum_end;
 599                 map_count = 1;
 600         }
 601         /* cover the last node */
 602         sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
 603         vmemmap_populate_print_last();
 604 }
 605 
 606 #ifdef CONFIG_MEMORY_HOTPLUG
 607 
 608 /* Mark all memory sections within the pfn range as online */
 609 void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
 610 {
 611         unsigned long pfn;
 612 
 613         for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
 614                 unsigned long section_nr = pfn_to_section_nr(pfn);
 615                 struct mem_section *ms;
 616 
 617                 /* onlining code should never touch invalid ranges */
 618                 if (WARN_ON(!valid_section_nr(section_nr)))
 619                         continue;
 620 
 621                 ms = __nr_to_section(section_nr);
 622                 ms->section_mem_map |= SECTION_IS_ONLINE;
 623         }
 624 }
 625 
 626 #ifdef CONFIG_MEMORY_HOTREMOVE
 627 /* Mark all memory sections within the pfn range as offline */
 628 void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
 629 {
 630         unsigned long pfn;
 631 
 632         for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
 633                 unsigned long section_nr = pfn_to_section_nr(pfn);
 634                 struct mem_section *ms;
 635 
 636                 /*
 637                  * TODO this needs some double checking. Offlining code makes
 638                  * sure to check pfn_valid but those checks might be just bogus
 639                  */
 640                 if (WARN_ON(!valid_section_nr(section_nr)))
 641                         continue;
 642 
 643                 ms = __nr_to_section(section_nr);
 644                 ms->section_mem_map &= ~SECTION_IS_ONLINE;
 645         }
 646 }
 647 #endif
 648 
 649 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 650 static struct page * __meminit populate_section_memmap(unsigned long pfn,
 651                 unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
 652 {
 653         return __populate_section_memmap(pfn, nr_pages, nid, altmap);
 654 }
 655 
 656 static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
 657                 struct vmem_altmap *altmap)
 658 {
 659         unsigned long start = (unsigned long) pfn_to_page(pfn);
 660         unsigned long end = start + nr_pages * sizeof(struct page);
 661 
 662         vmemmap_free(start, end, altmap);
 663 }
 664 static void free_map_bootmem(struct page *memmap)
 665 {
 666         unsigned long start = (unsigned long)memmap;
 667         unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
 668 
 669         vmemmap_free(start, end, NULL);
 670 }
 671 #else
 672 struct page * __meminit populate_section_memmap(unsigned long pfn,
 673                 unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
 674 {
 675         struct page *page, *ret;
 676         unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
 677 
 678         page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
 679         if (page)
 680                 goto got_map_page;
 681 
 682         ret = vmalloc(memmap_size);
 683         if (ret)
 684                 goto got_map_ptr;
 685 
 686         return NULL;
 687 got_map_page:
 688         ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
 689 got_map_ptr:
 690 
 691         return ret;
 692 }
 693 
 694 static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
 695                 struct vmem_altmap *altmap)
 696 {
 697         struct page *memmap = pfn_to_page(pfn);
 698 
 699         if (is_vmalloc_addr(memmap))
 700                 vfree(memmap);
 701         else
 702                 free_pages((unsigned long)memmap,
 703                            get_order(sizeof(struct page) * PAGES_PER_SECTION));
 704 }
 705 
 706 static void free_map_bootmem(struct page *memmap)
 707 {
 708         unsigned long maps_section_nr, removing_section_nr, i;
 709         unsigned long magic, nr_pages;
 710         struct page *page = virt_to_page(memmap);
 711 
 712         nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
 713                 >> PAGE_SHIFT;
 714 
 715         for (i = 0; i < nr_pages; i++, page++) {
 716                 magic = (unsigned long) page->freelist;
 717 
 718                 BUG_ON(magic == NODE_INFO);
 719 
 720                 maps_section_nr = pfn_to_section_nr(page_to_pfn(page));
 721                 removing_section_nr = page_private(page);
 722 
 723                 /*
 724                  * When this function is called, the removing section is
 725                  * logical offlined state. This means all pages are isolated
 726                  * from page allocator. If removing section's memmap is placed
 727                  * on the same section, it must not be freed.
 728                  * If it is freed, page allocator may allocate it which will
 729                  * be removed physically soon.
 730                  */
 731                 if (maps_section_nr != removing_section_nr)
 732                         put_page_bootmem(page);
 733         }
 734 }
 735 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
 736 
 737 static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
 738                 struct vmem_altmap *altmap)
 739 {
 740         DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
 741         DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
 742         struct mem_section *ms = __pfn_to_section(pfn);
 743         bool section_is_early = early_section(ms);
 744         struct page *memmap = NULL;
 745         bool empty;
 746         unsigned long *subsection_map = ms->usage
 747                 ? &ms->usage->subsection_map[0] : NULL;
 748 
 749         subsection_mask_set(map, pfn, nr_pages);
 750         if (subsection_map)
 751                 bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
 752 
 753         if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
 754                                 "section already deactivated (%#lx + %ld)\n",
 755                                 pfn, nr_pages))
 756                 return;
 757 
 758         /*
 759          * There are 3 cases to handle across two configurations
 760          * (SPARSEMEM_VMEMMAP={y,n}):
 761          *
 762          * 1/ deactivation of a partial hot-added section (only possible
 763          * in the SPARSEMEM_VMEMMAP=y case).
 764          *    a/ section was present at memory init
 765          *    b/ section was hot-added post memory init
 766          * 2/ deactivation of a complete hot-added section
 767          * 3/ deactivation of a complete section from memory init
 768          *
 769          * For 1/, when subsection_map does not empty we will not be
 770          * freeing the usage map, but still need to free the vmemmap
 771          * range.
 772          *
 773          * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified
 774          */
 775         bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
 776         empty = bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION);
 777         if (empty) {
 778                 unsigned long section_nr = pfn_to_section_nr(pfn);
 779 
 780                 /*
 781                  * When removing an early section, the usage map is kept (as the
 782                  * usage maps of other sections fall into the same page). It
 783                  * will be re-used when re-adding the section - which is then no
 784                  * longer an early section. If the usage map is PageReserved, it
 785                  * was allocated during boot.
 786                  */
 787                 if (!PageReserved(virt_to_page(ms->usage))) {
 788                         kfree(ms->usage);
 789                         ms->usage = NULL;
 790                 }
 791                 memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
 792                 /*
 793                  * Mark the section invalid so that valid_section()
 794                  * return false. This prevents code from dereferencing
 795                  * ms->usage array.
 796                  */
 797                 ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
 798         }
 799 
 800         if (section_is_early && memmap)
 801                 free_map_bootmem(memmap);
 802         else
 803                 depopulate_section_memmap(pfn, nr_pages, altmap);
 804 
 805         if (empty)
 806                 ms->section_mem_map = (unsigned long)NULL;
 807 }
 808 
 809 static struct page * __meminit section_activate(int nid, unsigned long pfn,
 810                 unsigned long nr_pages, struct vmem_altmap *altmap)
 811 {
 812         DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
 813         struct mem_section *ms = __pfn_to_section(pfn);
 814         struct mem_section_usage *usage = NULL;
 815         unsigned long *subsection_map;
 816         struct page *memmap;
 817         int rc = 0;
 818 
 819         subsection_mask_set(map, pfn, nr_pages);
 820 
 821         if (!ms->usage) {
 822                 usage = kzalloc(mem_section_usage_size(), GFP_KERNEL);
 823                 if (!usage)
 824                         return ERR_PTR(-ENOMEM);
 825                 ms->usage = usage;
 826         }
 827         subsection_map = &ms->usage->subsection_map[0];
 828 
 829         if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
 830                 rc = -EINVAL;
 831         else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
 832                 rc = -EEXIST;
 833         else
 834                 bitmap_or(subsection_map, map, subsection_map,
 835                                 SUBSECTIONS_PER_SECTION);
 836 
 837         if (rc) {
 838                 if (usage)
 839                         ms->usage = NULL;
 840                 kfree(usage);
 841                 return ERR_PTR(rc);
 842         }
 843 
 844         /*
 845          * The early init code does not consider partially populated
 846          * initial sections, it simply assumes that memory will never be
 847          * referenced.  If we hot-add memory into such a section then we
 848          * do not need to populate the memmap and can simply reuse what
 849          * is already there.
 850          */
 851         if (nr_pages < PAGES_PER_SECTION && early_section(ms))
 852                 return pfn_to_page(pfn);
 853 
 854         memmap = populate_section_memmap(pfn, nr_pages, nid, altmap);
 855         if (!memmap) {
 856                 section_deactivate(pfn, nr_pages, altmap);
 857                 return ERR_PTR(-ENOMEM);
 858         }
 859 
 860         return memmap;
 861 }
 862 
 863 /**
 864  * sparse_add_section - add a memory section, or populate an existing one
 865  * @nid: The node to add section on
 866  * @start_pfn: start pfn of the memory range
 867  * @nr_pages: number of pfns to add in the section
 868  * @altmap: device page map
 869  *
 870  * This is only intended for hotplug.
 871  *
 872  * Return:
 873  * * 0          - On success.
 874  * * -EEXIST    - Section has been present.
 875  * * -ENOMEM    - Out of memory.
 876  */
 877 int __meminit sparse_add_section(int nid, unsigned long start_pfn,
 878                 unsigned long nr_pages, struct vmem_altmap *altmap)
 879 {
 880         unsigned long section_nr = pfn_to_section_nr(start_pfn);
 881         struct mem_section *ms;
 882         struct page *memmap;
 883         int ret;
 884 
 885         ret = sparse_index_init(section_nr, nid);
 886         if (ret < 0)
 887                 return ret;
 888 
 889         memmap = section_activate(nid, start_pfn, nr_pages, altmap);
 890         if (IS_ERR(memmap))
 891                 return PTR_ERR(memmap);
 892 
 893         /*
 894          * Poison uninitialized struct pages in order to catch invalid flags
 895          * combinations.
 896          */
 897         page_init_poison(memmap, sizeof(struct page) * nr_pages);
 898 
 899         ms = __nr_to_section(section_nr);
 900         set_section_nid(section_nr, nid);
 901         section_mark_present(ms);
 902 
 903         /* Align memmap to section boundary in the subsection case */
 904         if (section_nr_to_pfn(section_nr) != start_pfn)
 905                 memmap = pfn_to_kaddr(section_nr_to_pfn(section_nr));
 906         sparse_init_one_section(ms, section_nr, memmap, ms->usage, 0);
 907 
 908         return 0;
 909 }
 910 
 911 #ifdef CONFIG_MEMORY_FAILURE
 912 static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
 913 {
 914         int i;
 915 
 916         /*
 917          * A further optimization is to have per section refcounted
 918          * num_poisoned_pages.  But that would need more space per memmap, so
 919          * for now just do a quick global check to speed up this routine in the
 920          * absence of bad pages.
 921          */
 922         if (atomic_long_read(&num_poisoned_pages) == 0)
 923                 return;
 924 
 925         for (i = 0; i < nr_pages; i++) {
 926                 if (PageHWPoison(&memmap[i])) {
 927                         num_poisoned_pages_dec();
 928                         ClearPageHWPoison(&memmap[i]);
 929                 }
 930         }
 931 }
 932 #else
 933 static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
 934 {
 935 }
 936 #endif
 937 
 938 void sparse_remove_section(struct mem_section *ms, unsigned long pfn,
 939                 unsigned long nr_pages, unsigned long map_offset,
 940                 struct vmem_altmap *altmap)
 941 {
 942         clear_hwpoisoned_pages(pfn_to_page(pfn) + map_offset,
 943                         nr_pages - map_offset);
 944         section_deactivate(pfn, nr_pages, altmap);
 945 }
 946 #endif /* CONFIG_MEMORY_HOTPLUG */

/* [<][>][^][v][top][bottom][index][help] */