1/* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 */ 13 14/* Changes: 15 * 16 * YOSHIFUJI Hideaki @USAGI 17 * reworked default router selection. 18 * - respect outgoing interface 19 * - select from (probably) reachable routers (i.e. 20 * routers in REACHABLE, STALE, DELAY or PROBE states). 21 * - always select the same router if it is (probably) 22 * reachable. otherwise, round-robin the list. 23 * Ville Nuorvala 24 * Fixed routing subtrees. 25 */ 26 27#define pr_fmt(fmt) "IPv6: " fmt 28 29#include <linux/capability.h> 30#include <linux/errno.h> 31#include <linux/export.h> 32#include <linux/types.h> 33#include <linux/times.h> 34#include <linux/socket.h> 35#include <linux/sockios.h> 36#include <linux/net.h> 37#include <linux/route.h> 38#include <linux/netdevice.h> 39#include <linux/in6.h> 40#include <linux/mroute6.h> 41#include <linux/init.h> 42#include <linux/if_arp.h> 43#include <linux/proc_fs.h> 44#include <linux/seq_file.h> 45#include <linux/nsproxy.h> 46#include <linux/slab.h> 47#include <net/net_namespace.h> 48#include <net/snmp.h> 49#include <net/ipv6.h> 50#include <net/ip6_fib.h> 51#include <net/ip6_route.h> 52#include <net/ndisc.h> 53#include <net/addrconf.h> 54#include <net/tcp.h> 55#include <linux/rtnetlink.h> 56#include <net/dst.h> 57#include <net/xfrm.h> 58#include <net/netevent.h> 59#include <net/netlink.h> 60#include <net/nexthop.h> 61 62#include <asm/uaccess.h> 63 64#ifdef CONFIG_SYSCTL 65#include <linux/sysctl.h> 66#endif 67 68enum rt6_nud_state { 69 RT6_NUD_FAIL_HARD = -3, 70 RT6_NUD_FAIL_PROBE = -2, 71 RT6_NUD_FAIL_DO_RR = -1, 72 RT6_NUD_SUCCEED = 1 73}; 74 75static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, 76 const struct in6_addr *dest); 77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 78static unsigned int ip6_default_advmss(const struct dst_entry *dst); 79static unsigned int ip6_mtu(const struct dst_entry *dst); 80static struct dst_entry *ip6_negative_advice(struct dst_entry *); 81static void ip6_dst_destroy(struct dst_entry *); 82static void ip6_dst_ifdown(struct dst_entry *, 83 struct net_device *dev, int how); 84static int ip6_dst_gc(struct dst_ops *ops); 85 86static int ip6_pkt_discard(struct sk_buff *skb); 87static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb); 88static int ip6_pkt_prohibit(struct sk_buff *skb); 89static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb); 90static void ip6_link_failure(struct sk_buff *skb); 91static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 92 struct sk_buff *skb, u32 mtu); 93static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, 94 struct sk_buff *skb); 95static int rt6_score_route(struct rt6_info *rt, int oif, int strict); 96 97#ifdef CONFIG_IPV6_ROUTE_INFO 98static struct rt6_info *rt6_add_route_info(struct net *net, 99 const struct in6_addr *prefix, int prefixlen, 100 const struct in6_addr *gwaddr, int ifindex, 101 unsigned int pref); 102static struct rt6_info *rt6_get_route_info(struct net *net, 103 const struct in6_addr *prefix, int prefixlen, 104 const struct in6_addr *gwaddr, int ifindex); 105#endif 106 107static void rt6_bind_peer(struct rt6_info *rt, int create) 108{ 109 struct inet_peer_base *base; 110 struct inet_peer *peer; 111 112 base = inetpeer_base_ptr(rt->_rt6i_peer); 113 if (!base) 114 return; 115 116 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); 117 if (peer) { 118 if (!rt6_set_peer(rt, peer)) 119 inet_putpeer(peer); 120 } 121} 122 123static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create) 124{ 125 if (rt6_has_peer(rt)) 126 return rt6_peer_ptr(rt); 127 128 rt6_bind_peer(rt, create); 129 return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL); 130} 131 132static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt) 133{ 134 return __rt6_get_peer(rt, 1); 135} 136 137static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) 138{ 139 struct rt6_info *rt = (struct rt6_info *) dst; 140 struct inet_peer *peer; 141 u32 *p = NULL; 142 143 if (!(rt->dst.flags & DST_HOST)) 144 return dst_cow_metrics_generic(dst, old); 145 146 peer = rt6_get_peer_create(rt); 147 if (peer) { 148 u32 *old_p = __DST_METRICS_PTR(old); 149 unsigned long prev, new; 150 151 p = peer->metrics; 152 if (inet_metrics_new(peer) || 153 (old & DST_METRICS_FORCE_OVERWRITE)) 154 memcpy(p, old_p, sizeof(u32) * RTAX_MAX); 155 156 new = (unsigned long) p; 157 prev = cmpxchg(&dst->_metrics, old, new); 158 159 if (prev != old) { 160 p = __DST_METRICS_PTR(prev); 161 if (prev & DST_METRICS_READ_ONLY) 162 p = NULL; 163 } 164 } 165 return p; 166} 167 168static inline const void *choose_neigh_daddr(struct rt6_info *rt, 169 struct sk_buff *skb, 170 const void *daddr) 171{ 172 struct in6_addr *p = &rt->rt6i_gateway; 173 174 if (!ipv6_addr_any(p)) 175 return (const void *) p; 176 else if (skb) 177 return &ipv6_hdr(skb)->daddr; 178 return daddr; 179} 180 181static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, 182 struct sk_buff *skb, 183 const void *daddr) 184{ 185 struct rt6_info *rt = (struct rt6_info *) dst; 186 struct neighbour *n; 187 188 daddr = choose_neigh_daddr(rt, skb, daddr); 189 n = __ipv6_neigh_lookup(dst->dev, daddr); 190 if (n) 191 return n; 192 return neigh_create(&nd_tbl, daddr, dst->dev); 193} 194 195static struct dst_ops ip6_dst_ops_template = { 196 .family = AF_INET6, 197 .gc = ip6_dst_gc, 198 .gc_thresh = 1024, 199 .check = ip6_dst_check, 200 .default_advmss = ip6_default_advmss, 201 .mtu = ip6_mtu, 202 .cow_metrics = ipv6_cow_metrics, 203 .destroy = ip6_dst_destroy, 204 .ifdown = ip6_dst_ifdown, 205 .negative_advice = ip6_negative_advice, 206 .link_failure = ip6_link_failure, 207 .update_pmtu = ip6_rt_update_pmtu, 208 .redirect = rt6_do_redirect, 209 .local_out = __ip6_local_out, 210 .neigh_lookup = ip6_neigh_lookup, 211}; 212 213static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) 214{ 215 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 216 217 return mtu ? : dst->dev->mtu; 218} 219 220static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, 221 struct sk_buff *skb, u32 mtu) 222{ 223} 224 225static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, 226 struct sk_buff *skb) 227{ 228} 229 230static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst, 231 unsigned long old) 232{ 233 return NULL; 234} 235 236static struct dst_ops ip6_dst_blackhole_ops = { 237 .family = AF_INET6, 238 .destroy = ip6_dst_destroy, 239 .check = ip6_dst_check, 240 .mtu = ip6_blackhole_mtu, 241 .default_advmss = ip6_default_advmss, 242 .update_pmtu = ip6_rt_blackhole_update_pmtu, 243 .redirect = ip6_rt_blackhole_redirect, 244 .cow_metrics = ip6_rt_blackhole_cow_metrics, 245 .neigh_lookup = ip6_neigh_lookup, 246}; 247 248static const u32 ip6_template_metrics[RTAX_MAX] = { 249 [RTAX_HOPLIMIT - 1] = 0, 250}; 251 252static const struct rt6_info ip6_null_entry_template = { 253 .dst = { 254 .__refcnt = ATOMIC_INIT(1), 255 .__use = 1, 256 .obsolete = DST_OBSOLETE_FORCE_CHK, 257 .error = -ENETUNREACH, 258 .input = ip6_pkt_discard, 259 .output = ip6_pkt_discard_out, 260 }, 261 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 262 .rt6i_protocol = RTPROT_KERNEL, 263 .rt6i_metric = ~(u32) 0, 264 .rt6i_ref = ATOMIC_INIT(1), 265}; 266 267#ifdef CONFIG_IPV6_MULTIPLE_TABLES 268 269static const struct rt6_info ip6_prohibit_entry_template = { 270 .dst = { 271 .__refcnt = ATOMIC_INIT(1), 272 .__use = 1, 273 .obsolete = DST_OBSOLETE_FORCE_CHK, 274 .error = -EACCES, 275 .input = ip6_pkt_prohibit, 276 .output = ip6_pkt_prohibit_out, 277 }, 278 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 279 .rt6i_protocol = RTPROT_KERNEL, 280 .rt6i_metric = ~(u32) 0, 281 .rt6i_ref = ATOMIC_INIT(1), 282}; 283 284static const struct rt6_info ip6_blk_hole_entry_template = { 285 .dst = { 286 .__refcnt = ATOMIC_INIT(1), 287 .__use = 1, 288 .obsolete = DST_OBSOLETE_FORCE_CHK, 289 .error = -EINVAL, 290 .input = dst_discard, 291 .output = dst_discard_sk, 292 }, 293 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 294 .rt6i_protocol = RTPROT_KERNEL, 295 .rt6i_metric = ~(u32) 0, 296 .rt6i_ref = ATOMIC_INIT(1), 297}; 298 299#endif 300 301/* allocate dst with ip6_dst_ops */ 302static inline struct rt6_info *ip6_dst_alloc(struct net *net, 303 struct net_device *dev, 304 int flags, 305 struct fib6_table *table) 306{ 307 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 308 0, DST_OBSOLETE_FORCE_CHK, flags); 309 310 if (rt) { 311 struct dst_entry *dst = &rt->dst; 312 313 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); 314 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); 315 INIT_LIST_HEAD(&rt->rt6i_siblings); 316 } 317 return rt; 318} 319 320static void ip6_dst_destroy(struct dst_entry *dst) 321{ 322 struct rt6_info *rt = (struct rt6_info *)dst; 323 struct inet6_dev *idev = rt->rt6i_idev; 324 struct dst_entry *from = dst->from; 325 326 if (!(rt->dst.flags & DST_HOST)) 327 dst_destroy_metrics_generic(dst); 328 329 if (idev) { 330 rt->rt6i_idev = NULL; 331 in6_dev_put(idev); 332 } 333 334 dst->from = NULL; 335 dst_release(from); 336 337 if (rt6_has_peer(rt)) { 338 struct inet_peer *peer = rt6_peer_ptr(rt); 339 inet_putpeer(peer); 340 } 341} 342 343static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 344 int how) 345{ 346 struct rt6_info *rt = (struct rt6_info *)dst; 347 struct inet6_dev *idev = rt->rt6i_idev; 348 struct net_device *loopback_dev = 349 dev_net(dev)->loopback_dev; 350 351 if (dev != loopback_dev) { 352 if (idev && idev->dev == dev) { 353 struct inet6_dev *loopback_idev = 354 in6_dev_get(loopback_dev); 355 if (loopback_idev) { 356 rt->rt6i_idev = loopback_idev; 357 in6_dev_put(idev); 358 } 359 } 360 } 361} 362 363static bool rt6_check_expired(const struct rt6_info *rt) 364{ 365 if (rt->rt6i_flags & RTF_EXPIRES) { 366 if (time_after(jiffies, rt->dst.expires)) 367 return true; 368 } else if (rt->dst.from) { 369 return rt6_check_expired((struct rt6_info *) rt->dst.from); 370 } 371 return false; 372} 373 374/* Multipath route selection: 375 * Hash based function using packet header and flowlabel. 376 * Adapted from fib_info_hashfn() 377 */ 378static int rt6_info_hash_nhsfn(unsigned int candidate_count, 379 const struct flowi6 *fl6) 380{ 381 unsigned int val = fl6->flowi6_proto; 382 383 val ^= ipv6_addr_hash(&fl6->daddr); 384 val ^= ipv6_addr_hash(&fl6->saddr); 385 386 /* Work only if this not encapsulated */ 387 switch (fl6->flowi6_proto) { 388 case IPPROTO_UDP: 389 case IPPROTO_TCP: 390 case IPPROTO_SCTP: 391 val ^= (__force u16)fl6->fl6_sport; 392 val ^= (__force u16)fl6->fl6_dport; 393 break; 394 395 case IPPROTO_ICMPV6: 396 val ^= (__force u16)fl6->fl6_icmp_type; 397 val ^= (__force u16)fl6->fl6_icmp_code; 398 break; 399 } 400 /* RFC6438 recommands to use flowlabel */ 401 val ^= (__force u32)fl6->flowlabel; 402 403 /* Perhaps, we need to tune, this function? */ 404 val = val ^ (val >> 7) ^ (val >> 12); 405 return val % candidate_count; 406} 407 408static struct rt6_info *rt6_multipath_select(struct rt6_info *match, 409 struct flowi6 *fl6, int oif, 410 int strict) 411{ 412 struct rt6_info *sibling, *next_sibling; 413 int route_choosen; 414 415 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6); 416 /* Don't change the route, if route_choosen == 0 417 * (siblings does not include ourself) 418 */ 419 if (route_choosen) 420 list_for_each_entry_safe(sibling, next_sibling, 421 &match->rt6i_siblings, rt6i_siblings) { 422 route_choosen--; 423 if (route_choosen == 0) { 424 if (rt6_score_route(sibling, oif, strict) < 0) 425 break; 426 match = sibling; 427 break; 428 } 429 } 430 return match; 431} 432 433/* 434 * Route lookup. Any table->tb6_lock is implied. 435 */ 436 437static inline struct rt6_info *rt6_device_match(struct net *net, 438 struct rt6_info *rt, 439 const struct in6_addr *saddr, 440 int oif, 441 int flags) 442{ 443 struct rt6_info *local = NULL; 444 struct rt6_info *sprt; 445 446 if (!oif && ipv6_addr_any(saddr)) 447 goto out; 448 449 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { 450 struct net_device *dev = sprt->dst.dev; 451 452 if (oif) { 453 if (dev->ifindex == oif) 454 return sprt; 455 if (dev->flags & IFF_LOOPBACK) { 456 if (!sprt->rt6i_idev || 457 sprt->rt6i_idev->dev->ifindex != oif) { 458 if (flags & RT6_LOOKUP_F_IFACE && oif) 459 continue; 460 if (local && (!oif || 461 local->rt6i_idev->dev->ifindex == oif)) 462 continue; 463 } 464 local = sprt; 465 } 466 } else { 467 if (ipv6_chk_addr(net, saddr, dev, 468 flags & RT6_LOOKUP_F_IFACE)) 469 return sprt; 470 } 471 } 472 473 if (oif) { 474 if (local) 475 return local; 476 477 if (flags & RT6_LOOKUP_F_IFACE) 478 return net->ipv6.ip6_null_entry; 479 } 480out: 481 return rt; 482} 483 484#ifdef CONFIG_IPV6_ROUTER_PREF 485struct __rt6_probe_work { 486 struct work_struct work; 487 struct in6_addr target; 488 struct net_device *dev; 489}; 490 491static void rt6_probe_deferred(struct work_struct *w) 492{ 493 struct in6_addr mcaddr; 494 struct __rt6_probe_work *work = 495 container_of(w, struct __rt6_probe_work, work); 496 497 addrconf_addr_solict_mult(&work->target, &mcaddr); 498 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL); 499 dev_put(work->dev); 500 kfree(work); 501} 502 503static void rt6_probe(struct rt6_info *rt) 504{ 505 struct neighbour *neigh; 506 /* 507 * Okay, this does not seem to be appropriate 508 * for now, however, we need to check if it 509 * is really so; aka Router Reachability Probing. 510 * 511 * Router Reachability Probe MUST be rate-limited 512 * to no more than one per minute. 513 */ 514 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY)) 515 return; 516 rcu_read_lock_bh(); 517 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); 518 if (neigh) { 519 write_lock(&neigh->lock); 520 if (neigh->nud_state & NUD_VALID) 521 goto out; 522 } 523 524 if (!neigh || 525 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 526 struct __rt6_probe_work *work; 527 528 work = kmalloc(sizeof(*work), GFP_ATOMIC); 529 530 if (neigh && work) 531 __neigh_set_probe_once(neigh); 532 533 if (neigh) 534 write_unlock(&neigh->lock); 535 536 if (work) { 537 INIT_WORK(&work->work, rt6_probe_deferred); 538 work->target = rt->rt6i_gateway; 539 dev_hold(rt->dst.dev); 540 work->dev = rt->dst.dev; 541 schedule_work(&work->work); 542 } 543 } else { 544out: 545 write_unlock(&neigh->lock); 546 } 547 rcu_read_unlock_bh(); 548} 549#else 550static inline void rt6_probe(struct rt6_info *rt) 551{ 552} 553#endif 554 555/* 556 * Default Router Selection (RFC 2461 6.3.6) 557 */ 558static inline int rt6_check_dev(struct rt6_info *rt, int oif) 559{ 560 struct net_device *dev = rt->dst.dev; 561 if (!oif || dev->ifindex == oif) 562 return 2; 563 if ((dev->flags & IFF_LOOPBACK) && 564 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 565 return 1; 566 return 0; 567} 568 569static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) 570{ 571 struct neighbour *neigh; 572 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; 573 574 if (rt->rt6i_flags & RTF_NONEXTHOP || 575 !(rt->rt6i_flags & RTF_GATEWAY)) 576 return RT6_NUD_SUCCEED; 577 578 rcu_read_lock_bh(); 579 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); 580 if (neigh) { 581 read_lock(&neigh->lock); 582 if (neigh->nud_state & NUD_VALID) 583 ret = RT6_NUD_SUCCEED; 584#ifdef CONFIG_IPV6_ROUTER_PREF 585 else if (!(neigh->nud_state & NUD_FAILED)) 586 ret = RT6_NUD_SUCCEED; 587 else 588 ret = RT6_NUD_FAIL_PROBE; 589#endif 590 read_unlock(&neigh->lock); 591 } else { 592 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? 593 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR; 594 } 595 rcu_read_unlock_bh(); 596 597 return ret; 598} 599 600static int rt6_score_route(struct rt6_info *rt, int oif, 601 int strict) 602{ 603 int m; 604 605 m = rt6_check_dev(rt, oif); 606 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 607 return RT6_NUD_FAIL_HARD; 608#ifdef CONFIG_IPV6_ROUTER_PREF 609 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 610#endif 611 if (strict & RT6_LOOKUP_F_REACHABLE) { 612 int n = rt6_check_neigh(rt); 613 if (n < 0) 614 return n; 615 } 616 return m; 617} 618 619static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 620 int *mpri, struct rt6_info *match, 621 bool *do_rr) 622{ 623 int m; 624 bool match_do_rr = false; 625 626 if (rt6_check_expired(rt)) 627 goto out; 628 629 m = rt6_score_route(rt, oif, strict); 630 if (m == RT6_NUD_FAIL_DO_RR) { 631 match_do_rr = true; 632 m = 0; /* lowest valid score */ 633 } else if (m == RT6_NUD_FAIL_HARD) { 634 goto out; 635 } 636 637 if (strict & RT6_LOOKUP_F_REACHABLE) 638 rt6_probe(rt); 639 640 /* note that m can be RT6_NUD_FAIL_PROBE at this point */ 641 if (m > *mpri) { 642 *do_rr = match_do_rr; 643 *mpri = m; 644 match = rt; 645 } 646out: 647 return match; 648} 649 650static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 651 struct rt6_info *rr_head, 652 u32 metric, int oif, int strict, 653 bool *do_rr) 654{ 655 struct rt6_info *rt, *match; 656 int mpri = -1; 657 658 match = NULL; 659 for (rt = rr_head; rt && rt->rt6i_metric == metric; 660 rt = rt->dst.rt6_next) 661 match = find_match(rt, oif, strict, &mpri, match, do_rr); 662 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; 663 rt = rt->dst.rt6_next) 664 match = find_match(rt, oif, strict, &mpri, match, do_rr); 665 666 return match; 667} 668 669static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) 670{ 671 struct rt6_info *match, *rt0; 672 struct net *net; 673 bool do_rr = false; 674 675 rt0 = fn->rr_ptr; 676 if (!rt0) 677 fn->rr_ptr = rt0 = fn->leaf; 678 679 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict, 680 &do_rr); 681 682 if (do_rr) { 683 struct rt6_info *next = rt0->dst.rt6_next; 684 685 /* no entries matched; do round-robin */ 686 if (!next || next->rt6i_metric != rt0->rt6i_metric) 687 next = fn->leaf; 688 689 if (next != rt0) 690 fn->rr_ptr = next; 691 } 692 693 net = dev_net(rt0->dst.dev); 694 return match ? match : net->ipv6.ip6_null_entry; 695} 696 697#ifdef CONFIG_IPV6_ROUTE_INFO 698int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 699 const struct in6_addr *gwaddr) 700{ 701 struct net *net = dev_net(dev); 702 struct route_info *rinfo = (struct route_info *) opt; 703 struct in6_addr prefix_buf, *prefix; 704 unsigned int pref; 705 unsigned long lifetime; 706 struct rt6_info *rt; 707 708 if (len < sizeof(struct route_info)) { 709 return -EINVAL; 710 } 711 712 /* Sanity check for prefix_len and length */ 713 if (rinfo->length > 3) { 714 return -EINVAL; 715 } else if (rinfo->prefix_len > 128) { 716 return -EINVAL; 717 } else if (rinfo->prefix_len > 64) { 718 if (rinfo->length < 2) { 719 return -EINVAL; 720 } 721 } else if (rinfo->prefix_len > 0) { 722 if (rinfo->length < 1) { 723 return -EINVAL; 724 } 725 } 726 727 pref = rinfo->route_pref; 728 if (pref == ICMPV6_ROUTER_PREF_INVALID) 729 return -EINVAL; 730 731 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 732 733 if (rinfo->length == 3) 734 prefix = (struct in6_addr *)rinfo->prefix; 735 else { 736 /* this function is safe */ 737 ipv6_addr_prefix(&prefix_buf, 738 (struct in6_addr *)rinfo->prefix, 739 rinfo->prefix_len); 740 prefix = &prefix_buf; 741 } 742 743 if (rinfo->prefix_len == 0) 744 rt = rt6_get_dflt_router(gwaddr, dev); 745 else 746 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, 747 gwaddr, dev->ifindex); 748 749 if (rt && !lifetime) { 750 ip6_del_rt(rt); 751 rt = NULL; 752 } 753 754 if (!rt && lifetime) 755 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 756 pref); 757 else if (rt) 758 rt->rt6i_flags = RTF_ROUTEINFO | 759 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 760 761 if (rt) { 762 if (!addrconf_finite_timeout(lifetime)) 763 rt6_clean_expires(rt); 764 else 765 rt6_set_expires(rt, jiffies + HZ * lifetime); 766 767 ip6_rt_put(rt); 768 } 769 return 0; 770} 771#endif 772 773static struct fib6_node* fib6_backtrack(struct fib6_node *fn, 774 struct in6_addr *saddr) 775{ 776 struct fib6_node *pn; 777 while (1) { 778 if (fn->fn_flags & RTN_TL_ROOT) 779 return NULL; 780 pn = fn->parent; 781 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) 782 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); 783 else 784 fn = pn; 785 if (fn->fn_flags & RTN_RTINFO) 786 return fn; 787 } 788} 789 790static struct rt6_info *ip6_pol_route_lookup(struct net *net, 791 struct fib6_table *table, 792 struct flowi6 *fl6, int flags) 793{ 794 struct fib6_node *fn; 795 struct rt6_info *rt; 796 797 read_lock_bh(&table->tb6_lock); 798 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 799restart: 800 rt = fn->leaf; 801 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); 802 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) 803 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags); 804 if (rt == net->ipv6.ip6_null_entry) { 805 fn = fib6_backtrack(fn, &fl6->saddr); 806 if (fn) 807 goto restart; 808 } 809 dst_use(&rt->dst, jiffies); 810 read_unlock_bh(&table->tb6_lock); 811 return rt; 812 813} 814 815struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, 816 int flags) 817{ 818 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup); 819} 820EXPORT_SYMBOL_GPL(ip6_route_lookup); 821 822struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 823 const struct in6_addr *saddr, int oif, int strict) 824{ 825 struct flowi6 fl6 = { 826 .flowi6_oif = oif, 827 .daddr = *daddr, 828 }; 829 struct dst_entry *dst; 830 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 831 832 if (saddr) { 833 memcpy(&fl6.saddr, saddr, sizeof(*saddr)); 834 flags |= RT6_LOOKUP_F_HAS_SADDR; 835 } 836 837 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); 838 if (dst->error == 0) 839 return (struct rt6_info *) dst; 840 841 dst_release(dst); 842 843 return NULL; 844} 845EXPORT_SYMBOL(rt6_lookup); 846 847/* ip6_ins_rt is called with FREE table->tb6_lock. 848 It takes new route entry, the addition fails by any reason the 849 route is freed. In any case, if caller does not hold it, it may 850 be destroyed. 851 */ 852 853static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, 854 struct mx6_config *mxc) 855{ 856 int err; 857 struct fib6_table *table; 858 859 table = rt->rt6i_table; 860 write_lock_bh(&table->tb6_lock); 861 err = fib6_add(&table->tb6_root, rt, info, mxc); 862 write_unlock_bh(&table->tb6_lock); 863 864 return err; 865} 866 867int ip6_ins_rt(struct rt6_info *rt) 868{ 869 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), }; 870 struct mx6_config mxc = { .mx = NULL, }; 871 872 return __ip6_ins_rt(rt, &info, &mxc); 873} 874 875static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, 876 const struct in6_addr *daddr, 877 const struct in6_addr *saddr) 878{ 879 struct rt6_info *rt; 880 881 /* 882 * Clone the route. 883 */ 884 885 rt = ip6_rt_copy(ort, daddr); 886 887 if (rt) { 888 if (ort->rt6i_dst.plen != 128 && 889 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) 890 rt->rt6i_flags |= RTF_ANYCAST; 891 892 rt->rt6i_flags |= RTF_CACHE; 893 894#ifdef CONFIG_IPV6_SUBTREES 895 if (rt->rt6i_src.plen && saddr) { 896 rt->rt6i_src.addr = *saddr; 897 rt->rt6i_src.plen = 128; 898 } 899#endif 900 } 901 902 return rt; 903} 904 905static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, 906 const struct in6_addr *daddr) 907{ 908 struct rt6_info *rt = ip6_rt_copy(ort, daddr); 909 910 if (rt) 911 rt->rt6i_flags |= RTF_CACHE; 912 return rt; 913} 914 915static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, 916 struct flowi6 *fl6, int flags) 917{ 918 struct fib6_node *fn, *saved_fn; 919 struct rt6_info *rt, *nrt; 920 int strict = 0; 921 int attempts = 3; 922 int err; 923 924 strict |= flags & RT6_LOOKUP_F_IFACE; 925 if (net->ipv6.devconf_all->forwarding == 0) 926 strict |= RT6_LOOKUP_F_REACHABLE; 927 928redo_fib6_lookup_lock: 929 read_lock_bh(&table->tb6_lock); 930 931 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 932 saved_fn = fn; 933 934redo_rt6_select: 935 rt = rt6_select(fn, oif, strict); 936 if (rt->rt6i_nsiblings) 937 rt = rt6_multipath_select(rt, fl6, oif, strict); 938 if (rt == net->ipv6.ip6_null_entry) { 939 fn = fib6_backtrack(fn, &fl6->saddr); 940 if (fn) 941 goto redo_rt6_select; 942 else if (strict & RT6_LOOKUP_F_REACHABLE) { 943 /* also consider unreachable route */ 944 strict &= ~RT6_LOOKUP_F_REACHABLE; 945 fn = saved_fn; 946 goto redo_rt6_select; 947 } else { 948 dst_hold(&rt->dst); 949 read_unlock_bh(&table->tb6_lock); 950 goto out2; 951 } 952 } 953 954 dst_hold(&rt->dst); 955 read_unlock_bh(&table->tb6_lock); 956 957 if (rt->rt6i_flags & RTF_CACHE) 958 goto out2; 959 960 if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY))) 961 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); 962 else if (!(rt->dst.flags & DST_HOST)) 963 nrt = rt6_alloc_clone(rt, &fl6->daddr); 964 else 965 goto out2; 966 967 ip6_rt_put(rt); 968 rt = nrt ? : net->ipv6.ip6_null_entry; 969 970 dst_hold(&rt->dst); 971 if (nrt) { 972 err = ip6_ins_rt(nrt); 973 if (!err) 974 goto out2; 975 } 976 977 if (--attempts <= 0) 978 goto out2; 979 980 /* 981 * Race condition! In the gap, when table->tb6_lock was 982 * released someone could insert this route. Relookup. 983 */ 984 ip6_rt_put(rt); 985 goto redo_fib6_lookup_lock; 986 987out2: 988 rt->dst.lastuse = jiffies; 989 rt->dst.__use++; 990 991 return rt; 992} 993 994static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 995 struct flowi6 *fl6, int flags) 996{ 997 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); 998} 999 1000static struct dst_entry *ip6_route_input_lookup(struct net *net, 1001 struct net_device *dev, 1002 struct flowi6 *fl6, int flags) 1003{ 1004 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) 1005 flags |= RT6_LOOKUP_F_IFACE; 1006 1007 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input); 1008} 1009 1010void ip6_route_input(struct sk_buff *skb) 1011{ 1012 const struct ipv6hdr *iph = ipv6_hdr(skb); 1013 struct net *net = dev_net(skb->dev); 1014 int flags = RT6_LOOKUP_F_HAS_SADDR; 1015 struct flowi6 fl6 = { 1016 .flowi6_iif = skb->dev->ifindex, 1017 .daddr = iph->daddr, 1018 .saddr = iph->saddr, 1019 .flowlabel = ip6_flowinfo(iph), 1020 .flowi6_mark = skb->mark, 1021 .flowi6_proto = iph->nexthdr, 1022 }; 1023 1024 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags)); 1025} 1026 1027static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 1028 struct flowi6 *fl6, int flags) 1029{ 1030 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); 1031} 1032 1033struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, 1034 struct flowi6 *fl6, int flags) 1035{ 1036 fl6->flowi6_iif = LOOPBACK_IFINDEX; 1037 1038 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) 1039 flags |= RT6_LOOKUP_F_IFACE; 1040 1041 if (!ipv6_addr_any(&fl6->saddr)) 1042 flags |= RT6_LOOKUP_F_HAS_SADDR; 1043 else if (sk) 1044 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 1045 1046 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); 1047} 1048EXPORT_SYMBOL_GPL(ip6_route_output_flags); 1049 1050struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) 1051{ 1052 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; 1053 struct dst_entry *new = NULL; 1054 1055 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0); 1056 if (rt) { 1057 new = &rt->dst; 1058 1059 memset(new + 1, 0, sizeof(*rt) - sizeof(*new)); 1060 rt6_init_peer(rt, net->ipv6.peers); 1061 1062 new->__use = 1; 1063 new->input = dst_discard; 1064 new->output = dst_discard_sk; 1065 1066 if (dst_metrics_read_only(&ort->dst)) 1067 new->_metrics = ort->dst._metrics; 1068 else 1069 dst_copy_metrics(new, &ort->dst); 1070 rt->rt6i_idev = ort->rt6i_idev; 1071 if (rt->rt6i_idev) 1072 in6_dev_hold(rt->rt6i_idev); 1073 1074 rt->rt6i_gateway = ort->rt6i_gateway; 1075 rt->rt6i_flags = ort->rt6i_flags; 1076 rt->rt6i_metric = 0; 1077 1078 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1079#ifdef CONFIG_IPV6_SUBTREES 1080 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1081#endif 1082 1083 dst_free(new); 1084 } 1085 1086 dst_release(dst_orig); 1087 return new ? new : ERR_PTR(-ENOMEM); 1088} 1089 1090/* 1091 * Destination cache support functions 1092 */ 1093 1094static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 1095{ 1096 struct rt6_info *rt; 1097 1098 rt = (struct rt6_info *) dst; 1099 1100 /* All IPV6 dsts are created with ->obsolete set to the value 1101 * DST_OBSOLETE_FORCE_CHK which forces validation calls down 1102 * into this function always. 1103 */ 1104 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) 1105 return NULL; 1106 1107 if (rt6_check_expired(rt)) 1108 return NULL; 1109 1110 return dst; 1111} 1112 1113static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 1114{ 1115 struct rt6_info *rt = (struct rt6_info *) dst; 1116 1117 if (rt) { 1118 if (rt->rt6i_flags & RTF_CACHE) { 1119 if (rt6_check_expired(rt)) { 1120 ip6_del_rt(rt); 1121 dst = NULL; 1122 } 1123 } else { 1124 dst_release(dst); 1125 dst = NULL; 1126 } 1127 } 1128 return dst; 1129} 1130 1131static void ip6_link_failure(struct sk_buff *skb) 1132{ 1133 struct rt6_info *rt; 1134 1135 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); 1136 1137 rt = (struct rt6_info *) skb_dst(skb); 1138 if (rt) { 1139 if (rt->rt6i_flags & RTF_CACHE) { 1140 dst_hold(&rt->dst); 1141 if (ip6_del_rt(rt)) 1142 dst_free(&rt->dst); 1143 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) { 1144 rt->rt6i_node->fn_sernum = -1; 1145 } 1146 } 1147} 1148 1149static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 1150 struct sk_buff *skb, u32 mtu) 1151{ 1152 struct rt6_info *rt6 = (struct rt6_info *)dst; 1153 1154 dst_confirm(dst); 1155 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 1156 struct net *net = dev_net(dst->dev); 1157 1158 rt6->rt6i_flags |= RTF_MODIFIED; 1159 if (mtu < IPV6_MIN_MTU) 1160 mtu = IPV6_MIN_MTU; 1161 1162 dst_metric_set(dst, RTAX_MTU, mtu); 1163 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); 1164 } 1165} 1166 1167void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, 1168 int oif, u32 mark) 1169{ 1170 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1171 struct dst_entry *dst; 1172 struct flowi6 fl6; 1173 1174 memset(&fl6, 0, sizeof(fl6)); 1175 fl6.flowi6_oif = oif; 1176 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark); 1177 fl6.daddr = iph->daddr; 1178 fl6.saddr = iph->saddr; 1179 fl6.flowlabel = ip6_flowinfo(iph); 1180 1181 dst = ip6_route_output(net, NULL, &fl6); 1182 if (!dst->error) 1183 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu)); 1184 dst_release(dst); 1185} 1186EXPORT_SYMBOL_GPL(ip6_update_pmtu); 1187 1188void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) 1189{ 1190 ip6_update_pmtu(skb, sock_net(sk), mtu, 1191 sk->sk_bound_dev_if, sk->sk_mark); 1192} 1193EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); 1194 1195/* Handle redirects */ 1196struct ip6rd_flowi { 1197 struct flowi6 fl6; 1198 struct in6_addr gateway; 1199}; 1200 1201static struct rt6_info *__ip6_route_redirect(struct net *net, 1202 struct fib6_table *table, 1203 struct flowi6 *fl6, 1204 int flags) 1205{ 1206 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; 1207 struct rt6_info *rt; 1208 struct fib6_node *fn; 1209 1210 /* Get the "current" route for this destination and 1211 * check if the redirect has come from approriate router. 1212 * 1213 * RFC 4861 specifies that redirects should only be 1214 * accepted if they come from the nexthop to the target. 1215 * Due to the way the routes are chosen, this notion 1216 * is a bit fuzzy and one might need to check all possible 1217 * routes. 1218 */ 1219 1220 read_lock_bh(&table->tb6_lock); 1221 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1222restart: 1223 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1224 if (rt6_check_expired(rt)) 1225 continue; 1226 if (rt->dst.error) 1227 break; 1228 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1229 continue; 1230 if (fl6->flowi6_oif != rt->dst.dev->ifindex) 1231 continue; 1232 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) 1233 continue; 1234 break; 1235 } 1236 1237 if (!rt) 1238 rt = net->ipv6.ip6_null_entry; 1239 else if (rt->dst.error) { 1240 rt = net->ipv6.ip6_null_entry; 1241 goto out; 1242 } 1243 1244 if (rt == net->ipv6.ip6_null_entry) { 1245 fn = fib6_backtrack(fn, &fl6->saddr); 1246 if (fn) 1247 goto restart; 1248 } 1249 1250out: 1251 dst_hold(&rt->dst); 1252 1253 read_unlock_bh(&table->tb6_lock); 1254 1255 return rt; 1256}; 1257 1258static struct dst_entry *ip6_route_redirect(struct net *net, 1259 const struct flowi6 *fl6, 1260 const struct in6_addr *gateway) 1261{ 1262 int flags = RT6_LOOKUP_F_HAS_SADDR; 1263 struct ip6rd_flowi rdfl; 1264 1265 rdfl.fl6 = *fl6; 1266 rdfl.gateway = *gateway; 1267 1268 return fib6_rule_lookup(net, &rdfl.fl6, 1269 flags, __ip6_route_redirect); 1270} 1271 1272void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) 1273{ 1274 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1275 struct dst_entry *dst; 1276 struct flowi6 fl6; 1277 1278 memset(&fl6, 0, sizeof(fl6)); 1279 fl6.flowi6_iif = LOOPBACK_IFINDEX; 1280 fl6.flowi6_oif = oif; 1281 fl6.flowi6_mark = mark; 1282 fl6.daddr = iph->daddr; 1283 fl6.saddr = iph->saddr; 1284 fl6.flowlabel = ip6_flowinfo(iph); 1285 1286 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr); 1287 rt6_do_redirect(dst, NULL, skb); 1288 dst_release(dst); 1289} 1290EXPORT_SYMBOL_GPL(ip6_redirect); 1291 1292void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, 1293 u32 mark) 1294{ 1295 const struct ipv6hdr *iph = ipv6_hdr(skb); 1296 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb); 1297 struct dst_entry *dst; 1298 struct flowi6 fl6; 1299 1300 memset(&fl6, 0, sizeof(fl6)); 1301 fl6.flowi6_iif = LOOPBACK_IFINDEX; 1302 fl6.flowi6_oif = oif; 1303 fl6.flowi6_mark = mark; 1304 fl6.daddr = msg->dest; 1305 fl6.saddr = iph->daddr; 1306 1307 dst = ip6_route_redirect(net, &fl6, &iph->saddr); 1308 rt6_do_redirect(dst, NULL, skb); 1309 dst_release(dst); 1310} 1311 1312void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) 1313{ 1314 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); 1315} 1316EXPORT_SYMBOL_GPL(ip6_sk_redirect); 1317 1318static unsigned int ip6_default_advmss(const struct dst_entry *dst) 1319{ 1320 struct net_device *dev = dst->dev; 1321 unsigned int mtu = dst_mtu(dst); 1322 struct net *net = dev_net(dev); 1323 1324 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 1325 1326 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 1327 mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 1328 1329 /* 1330 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 1331 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 1332 * IPV6_MAXPLEN is also valid and means: "any MSS, 1333 * rely only on pmtu discovery" 1334 */ 1335 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 1336 mtu = IPV6_MAXPLEN; 1337 return mtu; 1338} 1339 1340static unsigned int ip6_mtu(const struct dst_entry *dst) 1341{ 1342 struct inet6_dev *idev; 1343 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 1344 1345 if (mtu) 1346 goto out; 1347 1348 mtu = IPV6_MIN_MTU; 1349 1350 rcu_read_lock(); 1351 idev = __in6_dev_get(dst->dev); 1352 if (idev) 1353 mtu = idev->cnf.mtu6; 1354 rcu_read_unlock(); 1355 1356out: 1357 return min_t(unsigned int, mtu, IP6_MAX_MTU); 1358} 1359 1360static struct dst_entry *icmp6_dst_gc_list; 1361static DEFINE_SPINLOCK(icmp6_dst_lock); 1362 1363struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 1364 struct flowi6 *fl6) 1365{ 1366 struct dst_entry *dst; 1367 struct rt6_info *rt; 1368 struct inet6_dev *idev = in6_dev_get(dev); 1369 struct net *net = dev_net(dev); 1370 1371 if (unlikely(!idev)) 1372 return ERR_PTR(-ENODEV); 1373 1374 rt = ip6_dst_alloc(net, dev, 0, NULL); 1375 if (unlikely(!rt)) { 1376 in6_dev_put(idev); 1377 dst = ERR_PTR(-ENOMEM); 1378 goto out; 1379 } 1380 1381 rt->dst.flags |= DST_HOST; 1382 rt->dst.output = ip6_output; 1383 atomic_set(&rt->dst.__refcnt, 1); 1384 rt->rt6i_gateway = fl6->daddr; 1385 rt->rt6i_dst.addr = fl6->daddr; 1386 rt->rt6i_dst.plen = 128; 1387 rt->rt6i_idev = idev; 1388 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0); 1389 1390 spin_lock_bh(&icmp6_dst_lock); 1391 rt->dst.next = icmp6_dst_gc_list; 1392 icmp6_dst_gc_list = &rt->dst; 1393 spin_unlock_bh(&icmp6_dst_lock); 1394 1395 fib6_force_start_gc(net); 1396 1397 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); 1398 1399out: 1400 return dst; 1401} 1402 1403int icmp6_dst_gc(void) 1404{ 1405 struct dst_entry *dst, **pprev; 1406 int more = 0; 1407 1408 spin_lock_bh(&icmp6_dst_lock); 1409 pprev = &icmp6_dst_gc_list; 1410 1411 while ((dst = *pprev) != NULL) { 1412 if (!atomic_read(&dst->__refcnt)) { 1413 *pprev = dst->next; 1414 dst_free(dst); 1415 } else { 1416 pprev = &dst->next; 1417 ++more; 1418 } 1419 } 1420 1421 spin_unlock_bh(&icmp6_dst_lock); 1422 1423 return more; 1424} 1425 1426static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), 1427 void *arg) 1428{ 1429 struct dst_entry *dst, **pprev; 1430 1431 spin_lock_bh(&icmp6_dst_lock); 1432 pprev = &icmp6_dst_gc_list; 1433 while ((dst = *pprev) != NULL) { 1434 struct rt6_info *rt = (struct rt6_info *) dst; 1435 if (func(rt, arg)) { 1436 *pprev = dst->next; 1437 dst_free(dst); 1438 } else { 1439 pprev = &dst->next; 1440 } 1441 } 1442 spin_unlock_bh(&icmp6_dst_lock); 1443} 1444 1445static int ip6_dst_gc(struct dst_ops *ops) 1446{ 1447 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); 1448 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 1449 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 1450 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1451 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1452 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1453 int entries; 1454 1455 entries = dst_entries_get_fast(ops); 1456 if (time_after(rt_last_gc + rt_min_interval, jiffies) && 1457 entries <= rt_max_size) 1458 goto out; 1459 1460 net->ipv6.ip6_rt_gc_expire++; 1461 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true); 1462 entries = dst_entries_get_slow(ops); 1463 if (entries < ops->gc_thresh) 1464 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1465out: 1466 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1467 return entries > rt_max_size; 1468} 1469 1470static int ip6_convert_metrics(struct mx6_config *mxc, 1471 const struct fib6_config *cfg) 1472{ 1473 struct nlattr *nla; 1474 int remaining; 1475 u32 *mp; 1476 1477 if (!cfg->fc_mx) 1478 return 0; 1479 1480 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); 1481 if (unlikely(!mp)) 1482 return -ENOMEM; 1483 1484 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1485 int type = nla_type(nla); 1486 1487 if (type) { 1488 u32 val; 1489 1490 if (unlikely(type > RTAX_MAX)) 1491 goto err; 1492 if (type == RTAX_CC_ALGO) { 1493 char tmp[TCP_CA_NAME_MAX]; 1494 1495 nla_strlcpy(tmp, nla, sizeof(tmp)); 1496 val = tcp_ca_get_key_by_name(tmp); 1497 if (val == TCP_CA_UNSPEC) 1498 goto err; 1499 } else { 1500 val = nla_get_u32(nla); 1501 } 1502 1503 mp[type - 1] = val; 1504 __set_bit(type - 1, mxc->mx_valid); 1505 } 1506 } 1507 1508 mxc->mx = mp; 1509 1510 return 0; 1511 err: 1512 kfree(mp); 1513 return -EINVAL; 1514} 1515 1516int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret) 1517{ 1518 int err; 1519 struct net *net = cfg->fc_nlinfo.nl_net; 1520 struct rt6_info *rt = NULL; 1521 struct net_device *dev = NULL; 1522 struct inet6_dev *idev = NULL; 1523 struct fib6_table *table; 1524 int addr_type; 1525 1526 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1527 return -EINVAL; 1528#ifndef CONFIG_IPV6_SUBTREES 1529 if (cfg->fc_src_len) 1530 return -EINVAL; 1531#endif 1532 if (cfg->fc_ifindex) { 1533 err = -ENODEV; 1534 dev = dev_get_by_index(net, cfg->fc_ifindex); 1535 if (!dev) 1536 goto out; 1537 idev = in6_dev_get(dev); 1538 if (!idev) 1539 goto out; 1540 } 1541 1542 if (cfg->fc_metric == 0) 1543 cfg->fc_metric = IP6_RT_PRIO_USER; 1544 1545 err = -ENOBUFS; 1546 if (cfg->fc_nlinfo.nlh && 1547 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) { 1548 table = fib6_get_table(net, cfg->fc_table); 1549 if (!table) { 1550 pr_warn("NLM_F_CREATE should be specified when creating new route\n"); 1551 table = fib6_new_table(net, cfg->fc_table); 1552 } 1553 } else { 1554 table = fib6_new_table(net, cfg->fc_table); 1555 } 1556 1557 if (!table) 1558 goto out; 1559 1560 rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table); 1561 1562 if (!rt) { 1563 err = -ENOMEM; 1564 goto out; 1565 } 1566 1567 if (cfg->fc_flags & RTF_EXPIRES) 1568 rt6_set_expires(rt, jiffies + 1569 clock_t_to_jiffies(cfg->fc_expires)); 1570 else 1571 rt6_clean_expires(rt); 1572 1573 if (cfg->fc_protocol == RTPROT_UNSPEC) 1574 cfg->fc_protocol = RTPROT_BOOT; 1575 rt->rt6i_protocol = cfg->fc_protocol; 1576 1577 addr_type = ipv6_addr_type(&cfg->fc_dst); 1578 1579 if (addr_type & IPV6_ADDR_MULTICAST) 1580 rt->dst.input = ip6_mc_input; 1581 else if (cfg->fc_flags & RTF_LOCAL) 1582 rt->dst.input = ip6_input; 1583 else 1584 rt->dst.input = ip6_forward; 1585 1586 rt->dst.output = ip6_output; 1587 1588 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1589 rt->rt6i_dst.plen = cfg->fc_dst_len; 1590 if (rt->rt6i_dst.plen == 128) { 1591 rt->dst.flags |= DST_HOST; 1592 dst_metrics_set_force_overwrite(&rt->dst); 1593 } 1594 1595#ifdef CONFIG_IPV6_SUBTREES 1596 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1597 rt->rt6i_src.plen = cfg->fc_src_len; 1598#endif 1599 1600 rt->rt6i_metric = cfg->fc_metric; 1601 1602 /* We cannot add true routes via loopback here, 1603 they would result in kernel looping; promote them to reject routes 1604 */ 1605 if ((cfg->fc_flags & RTF_REJECT) || 1606 (dev && (dev->flags & IFF_LOOPBACK) && 1607 !(addr_type & IPV6_ADDR_LOOPBACK) && 1608 !(cfg->fc_flags & RTF_LOCAL))) { 1609 /* hold loopback dev/idev if we haven't done so. */ 1610 if (dev != net->loopback_dev) { 1611 if (dev) { 1612 dev_put(dev); 1613 in6_dev_put(idev); 1614 } 1615 dev = net->loopback_dev; 1616 dev_hold(dev); 1617 idev = in6_dev_get(dev); 1618 if (!idev) { 1619 err = -ENODEV; 1620 goto out; 1621 } 1622 } 1623 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1624 switch (cfg->fc_type) { 1625 case RTN_BLACKHOLE: 1626 rt->dst.error = -EINVAL; 1627 rt->dst.output = dst_discard_sk; 1628 rt->dst.input = dst_discard; 1629 break; 1630 case RTN_PROHIBIT: 1631 rt->dst.error = -EACCES; 1632 rt->dst.output = ip6_pkt_prohibit_out; 1633 rt->dst.input = ip6_pkt_prohibit; 1634 break; 1635 case RTN_THROW: 1636 default: 1637 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN 1638 : -ENETUNREACH; 1639 rt->dst.output = ip6_pkt_discard_out; 1640 rt->dst.input = ip6_pkt_discard; 1641 break; 1642 } 1643 goto install_route; 1644 } 1645 1646 if (cfg->fc_flags & RTF_GATEWAY) { 1647 const struct in6_addr *gw_addr; 1648 int gwa_type; 1649 1650 gw_addr = &cfg->fc_gateway; 1651 rt->rt6i_gateway = *gw_addr; 1652 gwa_type = ipv6_addr_type(gw_addr); 1653 1654 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1655 struct rt6_info *grt; 1656 1657 /* IPv6 strictly inhibits using not link-local 1658 addresses as nexthop address. 1659 Otherwise, router will not able to send redirects. 1660 It is very good, but in some (rare!) circumstances 1661 (SIT, PtP, NBMA NOARP links) it is handy to allow 1662 some exceptions. --ANK 1663 */ 1664 err = -EINVAL; 1665 if (!(gwa_type & IPV6_ADDR_UNICAST)) 1666 goto out; 1667 1668 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 1669 1670 err = -EHOSTUNREACH; 1671 if (!grt) 1672 goto out; 1673 if (dev) { 1674 if (dev != grt->dst.dev) { 1675 ip6_rt_put(grt); 1676 goto out; 1677 } 1678 } else { 1679 dev = grt->dst.dev; 1680 idev = grt->rt6i_idev; 1681 dev_hold(dev); 1682 in6_dev_hold(grt->rt6i_idev); 1683 } 1684 if (!(grt->rt6i_flags & RTF_GATEWAY)) 1685 err = 0; 1686 ip6_rt_put(grt); 1687 1688 if (err) 1689 goto out; 1690 } 1691 err = -EINVAL; 1692 if (!dev || (dev->flags & IFF_LOOPBACK)) 1693 goto out; 1694 } 1695 1696 err = -ENODEV; 1697 if (!dev) 1698 goto out; 1699 1700 if (!ipv6_addr_any(&cfg->fc_prefsrc)) { 1701 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { 1702 err = -EINVAL; 1703 goto out; 1704 } 1705 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc; 1706 rt->rt6i_prefsrc.plen = 128; 1707 } else 1708 rt->rt6i_prefsrc.plen = 0; 1709 1710 rt->rt6i_flags = cfg->fc_flags; 1711 1712install_route: 1713 rt->dst.dev = dev; 1714 rt->rt6i_idev = idev; 1715 rt->rt6i_table = table; 1716 1717 cfg->fc_nlinfo.nl_net = dev_net(dev); 1718 1719 *rt_ret = rt; 1720 1721 return 0; 1722out: 1723 if (dev) 1724 dev_put(dev); 1725 if (idev) 1726 in6_dev_put(idev); 1727 if (rt) 1728 dst_free(&rt->dst); 1729 1730 *rt_ret = NULL; 1731 1732 return err; 1733} 1734 1735int ip6_route_add(struct fib6_config *cfg) 1736{ 1737 struct mx6_config mxc = { .mx = NULL, }; 1738 struct rt6_info *rt = NULL; 1739 int err; 1740 1741 err = ip6_route_info_create(cfg, &rt); 1742 if (err) 1743 goto out; 1744 1745 err = ip6_convert_metrics(&mxc, cfg); 1746 if (err) 1747 goto out; 1748 1749 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc); 1750 1751 kfree(mxc.mx); 1752 1753 return err; 1754out: 1755 if (rt) 1756 dst_free(&rt->dst); 1757 1758 return err; 1759} 1760 1761static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 1762{ 1763 int err; 1764 struct fib6_table *table; 1765 struct net *net = dev_net(rt->dst.dev); 1766 1767 if (rt == net->ipv6.ip6_null_entry) { 1768 err = -ENOENT; 1769 goto out; 1770 } 1771 1772 table = rt->rt6i_table; 1773 write_lock_bh(&table->tb6_lock); 1774 err = fib6_del(rt, info); 1775 write_unlock_bh(&table->tb6_lock); 1776 1777out: 1778 ip6_rt_put(rt); 1779 return err; 1780} 1781 1782int ip6_del_rt(struct rt6_info *rt) 1783{ 1784 struct nl_info info = { 1785 .nl_net = dev_net(rt->dst.dev), 1786 }; 1787 return __ip6_del_rt(rt, &info); 1788} 1789 1790static int ip6_route_del(struct fib6_config *cfg) 1791{ 1792 struct fib6_table *table; 1793 struct fib6_node *fn; 1794 struct rt6_info *rt; 1795 int err = -ESRCH; 1796 1797 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 1798 if (!table) 1799 return err; 1800 1801 read_lock_bh(&table->tb6_lock); 1802 1803 fn = fib6_locate(&table->tb6_root, 1804 &cfg->fc_dst, cfg->fc_dst_len, 1805 &cfg->fc_src, cfg->fc_src_len); 1806 1807 if (fn) { 1808 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1809 if (cfg->fc_ifindex && 1810 (!rt->dst.dev || 1811 rt->dst.dev->ifindex != cfg->fc_ifindex)) 1812 continue; 1813 if (cfg->fc_flags & RTF_GATEWAY && 1814 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 1815 continue; 1816 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 1817 continue; 1818 dst_hold(&rt->dst); 1819 read_unlock_bh(&table->tb6_lock); 1820 1821 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 1822 } 1823 } 1824 read_unlock_bh(&table->tb6_lock); 1825 1826 return err; 1827} 1828 1829static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) 1830{ 1831 struct net *net = dev_net(skb->dev); 1832 struct netevent_redirect netevent; 1833 struct rt6_info *rt, *nrt = NULL; 1834 struct ndisc_options ndopts; 1835 struct inet6_dev *in6_dev; 1836 struct neighbour *neigh; 1837 struct rd_msg *msg; 1838 int optlen, on_link; 1839 u8 *lladdr; 1840 1841 optlen = skb_tail_pointer(skb) - skb_transport_header(skb); 1842 optlen -= sizeof(*msg); 1843 1844 if (optlen < 0) { 1845 net_dbg_ratelimited("rt6_do_redirect: packet too short\n"); 1846 return; 1847 } 1848 1849 msg = (struct rd_msg *)icmp6_hdr(skb); 1850 1851 if (ipv6_addr_is_multicast(&msg->dest)) { 1852 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n"); 1853 return; 1854 } 1855 1856 on_link = 0; 1857 if (ipv6_addr_equal(&msg->dest, &msg->target)) { 1858 on_link = 1; 1859 } else if (ipv6_addr_type(&msg->target) != 1860 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { 1861 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n"); 1862 return; 1863 } 1864 1865 in6_dev = __in6_dev_get(skb->dev); 1866 if (!in6_dev) 1867 return; 1868 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) 1869 return; 1870 1871 /* RFC2461 8.1: 1872 * The IP source address of the Redirect MUST be the same as the current 1873 * first-hop router for the specified ICMP Destination Address. 1874 */ 1875 1876 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) { 1877 net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); 1878 return; 1879 } 1880 1881 lladdr = NULL; 1882 if (ndopts.nd_opts_tgt_lladdr) { 1883 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, 1884 skb->dev); 1885 if (!lladdr) { 1886 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n"); 1887 return; 1888 } 1889 } 1890 1891 rt = (struct rt6_info *) dst; 1892 if (rt == net->ipv6.ip6_null_entry) { 1893 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); 1894 return; 1895 } 1896 1897 /* Redirect received -> path was valid. 1898 * Look, redirects are sent only in response to data packets, 1899 * so that this nexthop apparently is reachable. --ANK 1900 */ 1901 dst_confirm(&rt->dst); 1902 1903 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1); 1904 if (!neigh) 1905 return; 1906 1907 /* 1908 * We have finally decided to accept it. 1909 */ 1910 1911 neigh_update(neigh, lladdr, NUD_STALE, 1912 NEIGH_UPDATE_F_WEAK_OVERRIDE| 1913 NEIGH_UPDATE_F_OVERRIDE| 1914 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 1915 NEIGH_UPDATE_F_ISROUTER)) 1916 ); 1917 1918 nrt = ip6_rt_copy(rt, &msg->dest); 1919 if (!nrt) 1920 goto out; 1921 1922 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 1923 if (on_link) 1924 nrt->rt6i_flags &= ~RTF_GATEWAY; 1925 1926 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; 1927 1928 if (ip6_ins_rt(nrt)) 1929 goto out; 1930 1931 netevent.old = &rt->dst; 1932 netevent.new = &nrt->dst; 1933 netevent.daddr = &msg->dest; 1934 netevent.neigh = neigh; 1935 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1936 1937 if (rt->rt6i_flags & RTF_CACHE) { 1938 rt = (struct rt6_info *) dst_clone(&rt->dst); 1939 ip6_del_rt(rt); 1940 } 1941 1942out: 1943 neigh_release(neigh); 1944} 1945 1946/* 1947 * Misc support functions 1948 */ 1949 1950static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, 1951 const struct in6_addr *dest) 1952{ 1953 struct net *net = dev_net(ort->dst.dev); 1954 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0, 1955 ort->rt6i_table); 1956 1957 if (rt) { 1958 rt->dst.input = ort->dst.input; 1959 rt->dst.output = ort->dst.output; 1960 rt->dst.flags |= DST_HOST; 1961 1962 rt->rt6i_dst.addr = *dest; 1963 rt->rt6i_dst.plen = 128; 1964 dst_copy_metrics(&rt->dst, &ort->dst); 1965 rt->dst.error = ort->dst.error; 1966 rt->rt6i_idev = ort->rt6i_idev; 1967 if (rt->rt6i_idev) 1968 in6_dev_hold(rt->rt6i_idev); 1969 rt->dst.lastuse = jiffies; 1970 1971 if (ort->rt6i_flags & RTF_GATEWAY) 1972 rt->rt6i_gateway = ort->rt6i_gateway; 1973 else 1974 rt->rt6i_gateway = *dest; 1975 rt->rt6i_flags = ort->rt6i_flags; 1976 rt6_set_from(rt, ort); 1977 rt->rt6i_metric = 0; 1978 1979#ifdef CONFIG_IPV6_SUBTREES 1980 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1981#endif 1982 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key)); 1983 rt->rt6i_table = ort->rt6i_table; 1984 } 1985 return rt; 1986} 1987 1988#ifdef CONFIG_IPV6_ROUTE_INFO 1989static struct rt6_info *rt6_get_route_info(struct net *net, 1990 const struct in6_addr *prefix, int prefixlen, 1991 const struct in6_addr *gwaddr, int ifindex) 1992{ 1993 struct fib6_node *fn; 1994 struct rt6_info *rt = NULL; 1995 struct fib6_table *table; 1996 1997 table = fib6_get_table(net, RT6_TABLE_INFO); 1998 if (!table) 1999 return NULL; 2000 2001 read_lock_bh(&table->tb6_lock); 2002 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0); 2003 if (!fn) 2004 goto out; 2005 2006 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 2007 if (rt->dst.dev->ifindex != ifindex) 2008 continue; 2009 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 2010 continue; 2011 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 2012 continue; 2013 dst_hold(&rt->dst); 2014 break; 2015 } 2016out: 2017 read_unlock_bh(&table->tb6_lock); 2018 return rt; 2019} 2020 2021static struct rt6_info *rt6_add_route_info(struct net *net, 2022 const struct in6_addr *prefix, int prefixlen, 2023 const struct in6_addr *gwaddr, int ifindex, 2024 unsigned int pref) 2025{ 2026 struct fib6_config cfg = { 2027 .fc_table = RT6_TABLE_INFO, 2028 .fc_metric = IP6_RT_PRIO_USER, 2029 .fc_ifindex = ifindex, 2030 .fc_dst_len = prefixlen, 2031 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 2032 RTF_UP | RTF_PREF(pref), 2033 .fc_nlinfo.portid = 0, 2034 .fc_nlinfo.nlh = NULL, 2035 .fc_nlinfo.nl_net = net, 2036 }; 2037 2038 cfg.fc_dst = *prefix; 2039 cfg.fc_gateway = *gwaddr; 2040 2041 /* We should treat it as a default route if prefix length is 0. */ 2042 if (!prefixlen) 2043 cfg.fc_flags |= RTF_DEFAULT; 2044 2045 ip6_route_add(&cfg); 2046 2047 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); 2048} 2049#endif 2050 2051struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) 2052{ 2053 struct rt6_info *rt; 2054 struct fib6_table *table; 2055 2056 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); 2057 if (!table) 2058 return NULL; 2059 2060 read_lock_bh(&table->tb6_lock); 2061 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 2062 if (dev == rt->dst.dev && 2063 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 2064 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 2065 break; 2066 } 2067 if (rt) 2068 dst_hold(&rt->dst); 2069 read_unlock_bh(&table->tb6_lock); 2070 return rt; 2071} 2072 2073struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, 2074 struct net_device *dev, 2075 unsigned int pref) 2076{ 2077 struct fib6_config cfg = { 2078 .fc_table = RT6_TABLE_DFLT, 2079 .fc_metric = IP6_RT_PRIO_USER, 2080 .fc_ifindex = dev->ifindex, 2081 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 2082 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 2083 .fc_nlinfo.portid = 0, 2084 .fc_nlinfo.nlh = NULL, 2085 .fc_nlinfo.nl_net = dev_net(dev), 2086 }; 2087 2088 cfg.fc_gateway = *gwaddr; 2089 2090 ip6_route_add(&cfg); 2091 2092 return rt6_get_dflt_router(gwaddr, dev); 2093} 2094 2095void rt6_purge_dflt_routers(struct net *net) 2096{ 2097 struct rt6_info *rt; 2098 struct fib6_table *table; 2099 2100 /* NOTE: Keep consistent with rt6_get_dflt_router */ 2101 table = fib6_get_table(net, RT6_TABLE_DFLT); 2102 if (!table) 2103 return; 2104 2105restart: 2106 read_lock_bh(&table->tb6_lock); 2107 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 2108 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && 2109 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { 2110 dst_hold(&rt->dst); 2111 read_unlock_bh(&table->tb6_lock); 2112 ip6_del_rt(rt); 2113 goto restart; 2114 } 2115 } 2116 read_unlock_bh(&table->tb6_lock); 2117} 2118 2119static void rtmsg_to_fib6_config(struct net *net, 2120 struct in6_rtmsg *rtmsg, 2121 struct fib6_config *cfg) 2122{ 2123 memset(cfg, 0, sizeof(*cfg)); 2124 2125 cfg->fc_table = RT6_TABLE_MAIN; 2126 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 2127 cfg->fc_metric = rtmsg->rtmsg_metric; 2128 cfg->fc_expires = rtmsg->rtmsg_info; 2129 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 2130 cfg->fc_src_len = rtmsg->rtmsg_src_len; 2131 cfg->fc_flags = rtmsg->rtmsg_flags; 2132 2133 cfg->fc_nlinfo.nl_net = net; 2134 2135 cfg->fc_dst = rtmsg->rtmsg_dst; 2136 cfg->fc_src = rtmsg->rtmsg_src; 2137 cfg->fc_gateway = rtmsg->rtmsg_gateway; 2138} 2139 2140int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 2141{ 2142 struct fib6_config cfg; 2143 struct in6_rtmsg rtmsg; 2144 int err; 2145 2146 switch (cmd) { 2147 case SIOCADDRT: /* Add a route */ 2148 case SIOCDELRT: /* Delete a route */ 2149 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 2150 return -EPERM; 2151 err = copy_from_user(&rtmsg, arg, 2152 sizeof(struct in6_rtmsg)); 2153 if (err) 2154 return -EFAULT; 2155 2156 rtmsg_to_fib6_config(net, &rtmsg, &cfg); 2157 2158 rtnl_lock(); 2159 switch (cmd) { 2160 case SIOCADDRT: 2161 err = ip6_route_add(&cfg); 2162 break; 2163 case SIOCDELRT: 2164 err = ip6_route_del(&cfg); 2165 break; 2166 default: 2167 err = -EINVAL; 2168 } 2169 rtnl_unlock(); 2170 2171 return err; 2172 } 2173 2174 return -EINVAL; 2175} 2176 2177/* 2178 * Drop the packet on the floor 2179 */ 2180 2181static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) 2182{ 2183 int type; 2184 struct dst_entry *dst = skb_dst(skb); 2185 switch (ipstats_mib_noroutes) { 2186 case IPSTATS_MIB_INNOROUTES: 2187 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 2188 if (type == IPV6_ADDR_ANY) { 2189 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 2190 IPSTATS_MIB_INADDRERRORS); 2191 break; 2192 } 2193 /* FALLTHROUGH */ 2194 case IPSTATS_MIB_OUTNOROUTES: 2195 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 2196 ipstats_mib_noroutes); 2197 break; 2198 } 2199 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); 2200 kfree_skb(skb); 2201 return 0; 2202} 2203 2204static int ip6_pkt_discard(struct sk_buff *skb) 2205{ 2206 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 2207} 2208 2209static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb) 2210{ 2211 skb->dev = skb_dst(skb)->dev; 2212 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 2213} 2214 2215static int ip6_pkt_prohibit(struct sk_buff *skb) 2216{ 2217 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 2218} 2219 2220static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb) 2221{ 2222 skb->dev = skb_dst(skb)->dev; 2223 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 2224} 2225 2226/* 2227 * Allocate a dst for local (unicast / anycast) address. 2228 */ 2229 2230struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 2231 const struct in6_addr *addr, 2232 bool anycast) 2233{ 2234 struct net *net = dev_net(idev->dev); 2235 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 2236 DST_NOCOUNT, NULL); 2237 if (!rt) 2238 return ERR_PTR(-ENOMEM); 2239 2240 in6_dev_hold(idev); 2241 2242 rt->dst.flags |= DST_HOST; 2243 rt->dst.input = ip6_input; 2244 rt->dst.output = ip6_output; 2245 rt->rt6i_idev = idev; 2246 2247 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 2248 if (anycast) 2249 rt->rt6i_flags |= RTF_ANYCAST; 2250 else 2251 rt->rt6i_flags |= RTF_LOCAL; 2252 2253 rt->rt6i_gateway = *addr; 2254 rt->rt6i_dst.addr = *addr; 2255 rt->rt6i_dst.plen = 128; 2256 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 2257 2258 atomic_set(&rt->dst.__refcnt, 1); 2259 2260 return rt; 2261} 2262 2263int ip6_route_get_saddr(struct net *net, 2264 struct rt6_info *rt, 2265 const struct in6_addr *daddr, 2266 unsigned int prefs, 2267 struct in6_addr *saddr) 2268{ 2269 struct inet6_dev *idev = 2270 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL; 2271 int err = 0; 2272 if (rt && rt->rt6i_prefsrc.plen) 2273 *saddr = rt->rt6i_prefsrc.addr; 2274 else 2275 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, 2276 daddr, prefs, saddr); 2277 return err; 2278} 2279 2280/* remove deleted ip from prefsrc entries */ 2281struct arg_dev_net_ip { 2282 struct net_device *dev; 2283 struct net *net; 2284 struct in6_addr *addr; 2285}; 2286 2287static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) 2288{ 2289 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev; 2290 struct net *net = ((struct arg_dev_net_ip *)arg)->net; 2291 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; 2292 2293 if (((void *)rt->dst.dev == dev || !dev) && 2294 rt != net->ipv6.ip6_null_entry && 2295 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { 2296 /* remove prefsrc entry */ 2297 rt->rt6i_prefsrc.plen = 0; 2298 } 2299 return 0; 2300} 2301 2302void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) 2303{ 2304 struct net *net = dev_net(ifp->idev->dev); 2305 struct arg_dev_net_ip adni = { 2306 .dev = ifp->idev->dev, 2307 .net = net, 2308 .addr = &ifp->addr, 2309 }; 2310 fib6_clean_all(net, fib6_remove_prefsrc, &adni); 2311} 2312 2313#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) 2314#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) 2315 2316/* Remove routers and update dst entries when gateway turn into host. */ 2317static int fib6_clean_tohost(struct rt6_info *rt, void *arg) 2318{ 2319 struct in6_addr *gateway = (struct in6_addr *)arg; 2320 2321 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) || 2322 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) && 2323 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { 2324 return -1; 2325 } 2326 return 0; 2327} 2328 2329void rt6_clean_tohost(struct net *net, struct in6_addr *gateway) 2330{ 2331 fib6_clean_all(net, fib6_clean_tohost, gateway); 2332} 2333 2334struct arg_dev_net { 2335 struct net_device *dev; 2336 struct net *net; 2337}; 2338 2339static int fib6_ifdown(struct rt6_info *rt, void *arg) 2340{ 2341 const struct arg_dev_net *adn = arg; 2342 const struct net_device *dev = adn->dev; 2343 2344 if ((rt->dst.dev == dev || !dev) && 2345 rt != adn->net->ipv6.ip6_null_entry) 2346 return -1; 2347 2348 return 0; 2349} 2350 2351void rt6_ifdown(struct net *net, struct net_device *dev) 2352{ 2353 struct arg_dev_net adn = { 2354 .dev = dev, 2355 .net = net, 2356 }; 2357 2358 fib6_clean_all(net, fib6_ifdown, &adn); 2359 icmp6_clean_all(fib6_ifdown, &adn); 2360} 2361 2362struct rt6_mtu_change_arg { 2363 struct net_device *dev; 2364 unsigned int mtu; 2365}; 2366 2367static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 2368{ 2369 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 2370 struct inet6_dev *idev; 2371 2372 /* In IPv6 pmtu discovery is not optional, 2373 so that RTAX_MTU lock cannot disable it. 2374 We still use this lock to block changes 2375 caused by addrconf/ndisc. 2376 */ 2377 2378 idev = __in6_dev_get(arg->dev); 2379 if (!idev) 2380 return 0; 2381 2382 /* For administrative MTU increase, there is no way to discover 2383 IPv6 PMTU increase, so PMTU increase should be updated here. 2384 Since RFC 1981 doesn't include administrative MTU increase 2385 update PMTU increase is a MUST. (i.e. jumbo frame) 2386 */ 2387 /* 2388 If new MTU is less than route PMTU, this new MTU will be the 2389 lowest MTU in the path, update the route PMTU to reflect PMTU 2390 decreases; if new MTU is greater than route PMTU, and the 2391 old MTU is the lowest MTU in the path, update the route PMTU 2392 to reflect the increase. In this case if the other nodes' MTU 2393 also have the lowest MTU, TOO BIG MESSAGE will be lead to 2394 PMTU discouvery. 2395 */ 2396 if (rt->dst.dev == arg->dev && 2397 !dst_metric_locked(&rt->dst, RTAX_MTU) && 2398 (dst_mtu(&rt->dst) >= arg->mtu || 2399 (dst_mtu(&rt->dst) < arg->mtu && 2400 dst_mtu(&rt->dst) == idev->cnf.mtu6))) { 2401 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); 2402 } 2403 return 0; 2404} 2405 2406void rt6_mtu_change(struct net_device *dev, unsigned int mtu) 2407{ 2408 struct rt6_mtu_change_arg arg = { 2409 .dev = dev, 2410 .mtu = mtu, 2411 }; 2412 2413 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg); 2414} 2415 2416static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 2417 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 2418 [RTA_OIF] = { .type = NLA_U32 }, 2419 [RTA_IIF] = { .type = NLA_U32 }, 2420 [RTA_PRIORITY] = { .type = NLA_U32 }, 2421 [RTA_METRICS] = { .type = NLA_NESTED }, 2422 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 2423 [RTA_PREF] = { .type = NLA_U8 }, 2424}; 2425 2426static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2427 struct fib6_config *cfg) 2428{ 2429 struct rtmsg *rtm; 2430 struct nlattr *tb[RTA_MAX+1]; 2431 unsigned int pref; 2432 int err; 2433 2434 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2435 if (err < 0) 2436 goto errout; 2437 2438 err = -EINVAL; 2439 rtm = nlmsg_data(nlh); 2440 memset(cfg, 0, sizeof(*cfg)); 2441 2442 cfg->fc_table = rtm->rtm_table; 2443 cfg->fc_dst_len = rtm->rtm_dst_len; 2444 cfg->fc_src_len = rtm->rtm_src_len; 2445 cfg->fc_flags = RTF_UP; 2446 cfg->fc_protocol = rtm->rtm_protocol; 2447 cfg->fc_type = rtm->rtm_type; 2448 2449 if (rtm->rtm_type == RTN_UNREACHABLE || 2450 rtm->rtm_type == RTN_BLACKHOLE || 2451 rtm->rtm_type == RTN_PROHIBIT || 2452 rtm->rtm_type == RTN_THROW) 2453 cfg->fc_flags |= RTF_REJECT; 2454 2455 if (rtm->rtm_type == RTN_LOCAL) 2456 cfg->fc_flags |= RTF_LOCAL; 2457 2458 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; 2459 cfg->fc_nlinfo.nlh = nlh; 2460 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2461 2462 if (tb[RTA_GATEWAY]) { 2463 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]); 2464 cfg->fc_flags |= RTF_GATEWAY; 2465 } 2466 2467 if (tb[RTA_DST]) { 2468 int plen = (rtm->rtm_dst_len + 7) >> 3; 2469 2470 if (nla_len(tb[RTA_DST]) < plen) 2471 goto errout; 2472 2473 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 2474 } 2475 2476 if (tb[RTA_SRC]) { 2477 int plen = (rtm->rtm_src_len + 7) >> 3; 2478 2479 if (nla_len(tb[RTA_SRC]) < plen) 2480 goto errout; 2481 2482 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 2483 } 2484 2485 if (tb[RTA_PREFSRC]) 2486 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]); 2487 2488 if (tb[RTA_OIF]) 2489 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 2490 2491 if (tb[RTA_PRIORITY]) 2492 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 2493 2494 if (tb[RTA_METRICS]) { 2495 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 2496 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 2497 } 2498 2499 if (tb[RTA_TABLE]) 2500 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 2501 2502 if (tb[RTA_MULTIPATH]) { 2503 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]); 2504 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); 2505 } 2506 2507 if (tb[RTA_PREF]) { 2508 pref = nla_get_u8(tb[RTA_PREF]); 2509 if (pref != ICMPV6_ROUTER_PREF_LOW && 2510 pref != ICMPV6_ROUTER_PREF_HIGH) 2511 pref = ICMPV6_ROUTER_PREF_MEDIUM; 2512 cfg->fc_flags |= RTF_PREF(pref); 2513 } 2514 2515 err = 0; 2516errout: 2517 return err; 2518} 2519 2520struct rt6_nh { 2521 struct rt6_info *rt6_info; 2522 struct fib6_config r_cfg; 2523 struct mx6_config mxc; 2524 struct list_head next; 2525}; 2526 2527static void ip6_print_replace_route_err(struct list_head *rt6_nh_list) 2528{ 2529 struct rt6_nh *nh; 2530 2531 list_for_each_entry(nh, rt6_nh_list, next) { 2532 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n", 2533 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway, 2534 nh->r_cfg.fc_ifindex); 2535 } 2536} 2537 2538static int ip6_route_info_append(struct list_head *rt6_nh_list, 2539 struct rt6_info *rt, struct fib6_config *r_cfg) 2540{ 2541 struct rt6_nh *nh; 2542 struct rt6_info *rtnh; 2543 int err = -EEXIST; 2544 2545 list_for_each_entry(nh, rt6_nh_list, next) { 2546 /* check if rt6_info already exists */ 2547 rtnh = nh->rt6_info; 2548 2549 if (rtnh->dst.dev == rt->dst.dev && 2550 rtnh->rt6i_idev == rt->rt6i_idev && 2551 ipv6_addr_equal(&rtnh->rt6i_gateway, 2552 &rt->rt6i_gateway)) 2553 return err; 2554 } 2555 2556 nh = kzalloc(sizeof(*nh), GFP_KERNEL); 2557 if (!nh) 2558 return -ENOMEM; 2559 nh->rt6_info = rt; 2560 err = ip6_convert_metrics(&nh->mxc, r_cfg); 2561 if (err) { 2562 kfree(nh); 2563 return err; 2564 } 2565 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg)); 2566 list_add_tail(&nh->next, rt6_nh_list); 2567 2568 return 0; 2569} 2570 2571static int ip6_route_multipath_add(struct fib6_config *cfg) 2572{ 2573 struct fib6_config r_cfg; 2574 struct rtnexthop *rtnh; 2575 struct rt6_info *rt; 2576 struct rt6_nh *err_nh; 2577 struct rt6_nh *nh, *nh_safe; 2578 int remaining; 2579 int attrlen; 2580 int err = 1; 2581 int nhn = 0; 2582 int replace = (cfg->fc_nlinfo.nlh && 2583 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE)); 2584 LIST_HEAD(rt6_nh_list); 2585 2586 remaining = cfg->fc_mp_len; 2587 rtnh = (struct rtnexthop *)cfg->fc_mp; 2588 2589 /* Parse a Multipath Entry and build a list (rt6_nh_list) of 2590 * rt6_info structs per nexthop 2591 */ 2592 while (rtnh_ok(rtnh, remaining)) { 2593 memcpy(&r_cfg, cfg, sizeof(*cfg)); 2594 if (rtnh->rtnh_ifindex) 2595 r_cfg.fc_ifindex = rtnh->rtnh_ifindex; 2596 2597 attrlen = rtnh_attrlen(rtnh); 2598 if (attrlen > 0) { 2599 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 2600 2601 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 2602 if (nla) { 2603 r_cfg.fc_gateway = nla_get_in6_addr(nla); 2604 r_cfg.fc_flags |= RTF_GATEWAY; 2605 } 2606 } 2607 2608 err = ip6_route_info_create(&r_cfg, &rt); 2609 if (err) 2610 goto cleanup; 2611 2612 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); 2613 if (err) { 2614 dst_free(&rt->dst); 2615 goto cleanup; 2616 } 2617 2618 rtnh = rtnh_next(rtnh, &remaining); 2619 } 2620 2621 err_nh = NULL; 2622 list_for_each_entry(nh, &rt6_nh_list, next) { 2623 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc); 2624 /* nh->rt6_info is used or freed at this point, reset to NULL*/ 2625 nh->rt6_info = NULL; 2626 if (err) { 2627 if (replace && nhn) 2628 ip6_print_replace_route_err(&rt6_nh_list); 2629 err_nh = nh; 2630 goto add_errout; 2631 } 2632 2633 /* Because each route is added like a single route we remove 2634 * these flags after the first nexthop: if there is a collision, 2635 * we have already failed to add the first nexthop: 2636 * fib6_add_rt2node() has rejected it; when replacing, old 2637 * nexthops have been replaced by first new, the rest should 2638 * be added to it. 2639 */ 2640 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | 2641 NLM_F_REPLACE); 2642 nhn++; 2643 } 2644 2645 goto cleanup; 2646 2647add_errout: 2648 /* Delete routes that were already added */ 2649 list_for_each_entry(nh, &rt6_nh_list, next) { 2650 if (err_nh == nh) 2651 break; 2652 ip6_route_del(&nh->r_cfg); 2653 } 2654 2655cleanup: 2656 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { 2657 if (nh->rt6_info) 2658 dst_free(&nh->rt6_info->dst); 2659 if (nh->mxc.mx) 2660 kfree(nh->mxc.mx); 2661 list_del(&nh->next); 2662 kfree(nh); 2663 } 2664 2665 return err; 2666} 2667 2668static int ip6_route_multipath_del(struct fib6_config *cfg) 2669{ 2670 struct fib6_config r_cfg; 2671 struct rtnexthop *rtnh; 2672 int remaining; 2673 int attrlen; 2674 int err = 1, last_err = 0; 2675 2676 remaining = cfg->fc_mp_len; 2677 rtnh = (struct rtnexthop *)cfg->fc_mp; 2678 2679 /* Parse a Multipath Entry */ 2680 while (rtnh_ok(rtnh, remaining)) { 2681 memcpy(&r_cfg, cfg, sizeof(*cfg)); 2682 if (rtnh->rtnh_ifindex) 2683 r_cfg.fc_ifindex = rtnh->rtnh_ifindex; 2684 2685 attrlen = rtnh_attrlen(rtnh); 2686 if (attrlen > 0) { 2687 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 2688 2689 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 2690 if (nla) { 2691 nla_memcpy(&r_cfg.fc_gateway, nla, 16); 2692 r_cfg.fc_flags |= RTF_GATEWAY; 2693 } 2694 } 2695 err = ip6_route_del(&r_cfg); 2696 if (err) 2697 last_err = err; 2698 2699 rtnh = rtnh_next(rtnh, &remaining); 2700 } 2701 2702 return last_err; 2703} 2704 2705static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) 2706{ 2707 struct fib6_config cfg; 2708 int err; 2709 2710 err = rtm_to_fib6_config(skb, nlh, &cfg); 2711 if (err < 0) 2712 return err; 2713 2714 if (cfg.fc_mp) 2715 return ip6_route_multipath_del(&cfg); 2716 else 2717 return ip6_route_del(&cfg); 2718} 2719 2720static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) 2721{ 2722 struct fib6_config cfg; 2723 int err; 2724 2725 err = rtm_to_fib6_config(skb, nlh, &cfg); 2726 if (err < 0) 2727 return err; 2728 2729 if (cfg.fc_mp) 2730 return ip6_route_multipath_add(&cfg); 2731 else 2732 return ip6_route_add(&cfg); 2733} 2734 2735static inline size_t rt6_nlmsg_size(void) 2736{ 2737 return NLMSG_ALIGN(sizeof(struct rtmsg)) 2738 + nla_total_size(16) /* RTA_SRC */ 2739 + nla_total_size(16) /* RTA_DST */ 2740 + nla_total_size(16) /* RTA_GATEWAY */ 2741 + nla_total_size(16) /* RTA_PREFSRC */ 2742 + nla_total_size(4) /* RTA_TABLE */ 2743 + nla_total_size(4) /* RTA_IIF */ 2744 + nla_total_size(4) /* RTA_OIF */ 2745 + nla_total_size(4) /* RTA_PRIORITY */ 2746 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 2747 + nla_total_size(sizeof(struct rta_cacheinfo)) 2748 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ 2749 + nla_total_size(1); /* RTA_PREF */ 2750} 2751 2752static int rt6_fill_node(struct net *net, 2753 struct sk_buff *skb, struct rt6_info *rt, 2754 struct in6_addr *dst, struct in6_addr *src, 2755 int iif, int type, u32 portid, u32 seq, 2756 int prefix, int nowait, unsigned int flags) 2757{ 2758 struct rtmsg *rtm; 2759 struct nlmsghdr *nlh; 2760 long expires; 2761 u32 table; 2762 2763 if (prefix) { /* user wants prefix routes only */ 2764 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 2765 /* success since this is not a prefix route */ 2766 return 1; 2767 } 2768 } 2769 2770 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); 2771 if (!nlh) 2772 return -EMSGSIZE; 2773 2774 rtm = nlmsg_data(nlh); 2775 rtm->rtm_family = AF_INET6; 2776 rtm->rtm_dst_len = rt->rt6i_dst.plen; 2777 rtm->rtm_src_len = rt->rt6i_src.plen; 2778 rtm->rtm_tos = 0; 2779 if (rt->rt6i_table) 2780 table = rt->rt6i_table->tb6_id; 2781 else 2782 table = RT6_TABLE_UNSPEC; 2783 rtm->rtm_table = table; 2784 if (nla_put_u32(skb, RTA_TABLE, table)) 2785 goto nla_put_failure; 2786 if (rt->rt6i_flags & RTF_REJECT) { 2787 switch (rt->dst.error) { 2788 case -EINVAL: 2789 rtm->rtm_type = RTN_BLACKHOLE; 2790 break; 2791 case -EACCES: 2792 rtm->rtm_type = RTN_PROHIBIT; 2793 break; 2794 case -EAGAIN: 2795 rtm->rtm_type = RTN_THROW; 2796 break; 2797 default: 2798 rtm->rtm_type = RTN_UNREACHABLE; 2799 break; 2800 } 2801 } 2802 else if (rt->rt6i_flags & RTF_LOCAL) 2803 rtm->rtm_type = RTN_LOCAL; 2804 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) 2805 rtm->rtm_type = RTN_LOCAL; 2806 else 2807 rtm->rtm_type = RTN_UNICAST; 2808 rtm->rtm_flags = 0; 2809 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2810 rtm->rtm_protocol = rt->rt6i_protocol; 2811 if (rt->rt6i_flags & RTF_DYNAMIC) 2812 rtm->rtm_protocol = RTPROT_REDIRECT; 2813 else if (rt->rt6i_flags & RTF_ADDRCONF) { 2814 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO)) 2815 rtm->rtm_protocol = RTPROT_RA; 2816 else 2817 rtm->rtm_protocol = RTPROT_KERNEL; 2818 } 2819 2820 if (rt->rt6i_flags & RTF_CACHE) 2821 rtm->rtm_flags |= RTM_F_CLONED; 2822 2823 if (dst) { 2824 if (nla_put_in6_addr(skb, RTA_DST, dst)) 2825 goto nla_put_failure; 2826 rtm->rtm_dst_len = 128; 2827 } else if (rtm->rtm_dst_len) 2828 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr)) 2829 goto nla_put_failure; 2830#ifdef CONFIG_IPV6_SUBTREES 2831 if (src) { 2832 if (nla_put_in6_addr(skb, RTA_SRC, src)) 2833 goto nla_put_failure; 2834 rtm->rtm_src_len = 128; 2835 } else if (rtm->rtm_src_len && 2836 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr)) 2837 goto nla_put_failure; 2838#endif 2839 if (iif) { 2840#ifdef CONFIG_IPV6_MROUTE 2841 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { 2842 int err = ip6mr_get_route(net, skb, rtm, nowait); 2843 if (err <= 0) { 2844 if (!nowait) { 2845 if (err == 0) 2846 return 0; 2847 goto nla_put_failure; 2848 } else { 2849 if (err == -EMSGSIZE) 2850 goto nla_put_failure; 2851 } 2852 } 2853 } else 2854#endif 2855 if (nla_put_u32(skb, RTA_IIF, iif)) 2856 goto nla_put_failure; 2857 } else if (dst) { 2858 struct in6_addr saddr_buf; 2859 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && 2860 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) 2861 goto nla_put_failure; 2862 } 2863 2864 if (rt->rt6i_prefsrc.plen) { 2865 struct in6_addr saddr_buf; 2866 saddr_buf = rt->rt6i_prefsrc.addr; 2867 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) 2868 goto nla_put_failure; 2869 } 2870 2871 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) 2872 goto nla_put_failure; 2873 2874 if (rt->rt6i_flags & RTF_GATEWAY) { 2875 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0) 2876 goto nla_put_failure; 2877 } 2878 2879 if (rt->dst.dev && 2880 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) 2881 goto nla_put_failure; 2882 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) 2883 goto nla_put_failure; 2884 2885 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; 2886 2887 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) 2888 goto nla_put_failure; 2889 2890 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) 2891 goto nla_put_failure; 2892 2893 nlmsg_end(skb, nlh); 2894 return 0; 2895 2896nla_put_failure: 2897 nlmsg_cancel(skb, nlh); 2898 return -EMSGSIZE; 2899} 2900 2901int rt6_dump_route(struct rt6_info *rt, void *p_arg) 2902{ 2903 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 2904 int prefix; 2905 2906 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 2907 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 2908 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 2909 } else 2910 prefix = 0; 2911 2912 return rt6_fill_node(arg->net, 2913 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 2914 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, 2915 prefix, 0, NLM_F_MULTI); 2916} 2917 2918static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) 2919{ 2920 struct net *net = sock_net(in_skb->sk); 2921 struct nlattr *tb[RTA_MAX+1]; 2922 struct rt6_info *rt; 2923 struct sk_buff *skb; 2924 struct rtmsg *rtm; 2925 struct flowi6 fl6; 2926 int err, iif = 0, oif = 0; 2927 2928 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2929 if (err < 0) 2930 goto errout; 2931 2932 err = -EINVAL; 2933 memset(&fl6, 0, sizeof(fl6)); 2934 2935 if (tb[RTA_SRC]) { 2936 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 2937 goto errout; 2938 2939 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]); 2940 } 2941 2942 if (tb[RTA_DST]) { 2943 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 2944 goto errout; 2945 2946 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]); 2947 } 2948 2949 if (tb[RTA_IIF]) 2950 iif = nla_get_u32(tb[RTA_IIF]); 2951 2952 if (tb[RTA_OIF]) 2953 oif = nla_get_u32(tb[RTA_OIF]); 2954 2955 if (tb[RTA_MARK]) 2956 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); 2957 2958 if (iif) { 2959 struct net_device *dev; 2960 int flags = 0; 2961 2962 dev = __dev_get_by_index(net, iif); 2963 if (!dev) { 2964 err = -ENODEV; 2965 goto errout; 2966 } 2967 2968 fl6.flowi6_iif = iif; 2969 2970 if (!ipv6_addr_any(&fl6.saddr)) 2971 flags |= RT6_LOOKUP_F_HAS_SADDR; 2972 2973 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6, 2974 flags); 2975 } else { 2976 fl6.flowi6_oif = oif; 2977 2978 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); 2979 } 2980 2981 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2982 if (!skb) { 2983 ip6_rt_put(rt); 2984 err = -ENOBUFS; 2985 goto errout; 2986 } 2987 2988 /* Reserve room for dummy headers, this skb can pass 2989 through good chunk of routing engine. 2990 */ 2991 skb_reset_mac_header(skb); 2992 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2993 2994 skb_dst_set(skb, &rt->dst); 2995 2996 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, 2997 RTM_NEWROUTE, NETLINK_CB(in_skb).portid, 2998 nlh->nlmsg_seq, 0, 0, 0); 2999 if (err < 0) { 3000 kfree_skb(skb); 3001 goto errout; 3002 } 3003 3004 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 3005errout: 3006 return err; 3007} 3008 3009void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 3010{ 3011 struct sk_buff *skb; 3012 struct net *net = info->nl_net; 3013 u32 seq; 3014 int err; 3015 3016 err = -ENOBUFS; 3017 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 3018 3019 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); 3020 if (!skb) 3021 goto errout; 3022 3023 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 3024 event, info->portid, seq, 0, 0, 0); 3025 if (err < 0) { 3026 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 3027 WARN_ON(err == -EMSGSIZE); 3028 kfree_skb(skb); 3029 goto errout; 3030 } 3031 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, 3032 info->nlh, gfp_any()); 3033 return; 3034errout: 3035 if (err < 0) 3036 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 3037} 3038 3039static int ip6_route_dev_notify(struct notifier_block *this, 3040 unsigned long event, void *ptr) 3041{ 3042 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 3043 struct net *net = dev_net(dev); 3044 3045 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { 3046 net->ipv6.ip6_null_entry->dst.dev = dev; 3047 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 3048#ifdef CONFIG_IPV6_MULTIPLE_TABLES 3049 net->ipv6.ip6_prohibit_entry->dst.dev = dev; 3050 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 3051 net->ipv6.ip6_blk_hole_entry->dst.dev = dev; 3052 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 3053#endif 3054 } 3055 3056 return NOTIFY_OK; 3057} 3058 3059/* 3060 * /proc 3061 */ 3062 3063#ifdef CONFIG_PROC_FS 3064 3065static const struct file_operations ipv6_route_proc_fops = { 3066 .owner = THIS_MODULE, 3067 .open = ipv6_route_open, 3068 .read = seq_read, 3069 .llseek = seq_lseek, 3070 .release = seq_release_net, 3071}; 3072 3073static int rt6_stats_seq_show(struct seq_file *seq, void *v) 3074{ 3075 struct net *net = (struct net *)seq->private; 3076 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 3077 net->ipv6.rt6_stats->fib_nodes, 3078 net->ipv6.rt6_stats->fib_route_nodes, 3079 net->ipv6.rt6_stats->fib_rt_alloc, 3080 net->ipv6.rt6_stats->fib_rt_entries, 3081 net->ipv6.rt6_stats->fib_rt_cache, 3082 dst_entries_get_slow(&net->ipv6.ip6_dst_ops), 3083 net->ipv6.rt6_stats->fib_discarded_routes); 3084 3085 return 0; 3086} 3087 3088static int rt6_stats_seq_open(struct inode *inode, struct file *file) 3089{ 3090 return single_open_net(inode, file, rt6_stats_seq_show); 3091} 3092 3093static const struct file_operations rt6_stats_seq_fops = { 3094 .owner = THIS_MODULE, 3095 .open = rt6_stats_seq_open, 3096 .read = seq_read, 3097 .llseek = seq_lseek, 3098 .release = single_release_net, 3099}; 3100#endif /* CONFIG_PROC_FS */ 3101 3102#ifdef CONFIG_SYSCTL 3103 3104static 3105int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, 3106 void __user *buffer, size_t *lenp, loff_t *ppos) 3107{ 3108 struct net *net; 3109 int delay; 3110 if (!write) 3111 return -EINVAL; 3112 3113 net = (struct net *)ctl->extra1; 3114 delay = net->ipv6.sysctl.flush_delay; 3115 proc_dointvec(ctl, write, buffer, lenp, ppos); 3116 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); 3117 return 0; 3118} 3119 3120struct ctl_table ipv6_route_table_template[] = { 3121 { 3122 .procname = "flush", 3123 .data = &init_net.ipv6.sysctl.flush_delay, 3124 .maxlen = sizeof(int), 3125 .mode = 0200, 3126 .proc_handler = ipv6_sysctl_rtcache_flush 3127 }, 3128 { 3129 .procname = "gc_thresh", 3130 .data = &ip6_dst_ops_template.gc_thresh, 3131 .maxlen = sizeof(int), 3132 .mode = 0644, 3133 .proc_handler = proc_dointvec, 3134 }, 3135 { 3136 .procname = "max_size", 3137 .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 3138 .maxlen = sizeof(int), 3139 .mode = 0644, 3140 .proc_handler = proc_dointvec, 3141 }, 3142 { 3143 .procname = "gc_min_interval", 3144 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 3145 .maxlen = sizeof(int), 3146 .mode = 0644, 3147 .proc_handler = proc_dointvec_jiffies, 3148 }, 3149 { 3150 .procname = "gc_timeout", 3151 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 3152 .maxlen = sizeof(int), 3153 .mode = 0644, 3154 .proc_handler = proc_dointvec_jiffies, 3155 }, 3156 { 3157 .procname = "gc_interval", 3158 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 3159 .maxlen = sizeof(int), 3160 .mode = 0644, 3161 .proc_handler = proc_dointvec_jiffies, 3162 }, 3163 { 3164 .procname = "gc_elasticity", 3165 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 3166 .maxlen = sizeof(int), 3167 .mode = 0644, 3168 .proc_handler = proc_dointvec, 3169 }, 3170 { 3171 .procname = "mtu_expires", 3172 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 3173 .maxlen = sizeof(int), 3174 .mode = 0644, 3175 .proc_handler = proc_dointvec_jiffies, 3176 }, 3177 { 3178 .procname = "min_adv_mss", 3179 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 3180 .maxlen = sizeof(int), 3181 .mode = 0644, 3182 .proc_handler = proc_dointvec, 3183 }, 3184 { 3185 .procname = "gc_min_interval_ms", 3186 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 3187 .maxlen = sizeof(int), 3188 .mode = 0644, 3189 .proc_handler = proc_dointvec_ms_jiffies, 3190 }, 3191 { } 3192}; 3193 3194struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) 3195{ 3196 struct ctl_table *table; 3197 3198 table = kmemdup(ipv6_route_table_template, 3199 sizeof(ipv6_route_table_template), 3200 GFP_KERNEL); 3201 3202 if (table) { 3203 table[0].data = &net->ipv6.sysctl.flush_delay; 3204 table[0].extra1 = net; 3205 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 3206 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 3207 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 3208 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 3209 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 3210 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 3211 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 3212 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 3213 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 3214 3215 /* Don't export sysctls to unprivileged users */ 3216 if (net->user_ns != &init_user_ns) 3217 table[0].procname = NULL; 3218 } 3219 3220 return table; 3221} 3222#endif 3223 3224static int __net_init ip6_route_net_init(struct net *net) 3225{ 3226 int ret = -ENOMEM; 3227 3228 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 3229 sizeof(net->ipv6.ip6_dst_ops)); 3230 3231 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) 3232 goto out_ip6_dst_ops; 3233 3234 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 3235 sizeof(*net->ipv6.ip6_null_entry), 3236 GFP_KERNEL); 3237 if (!net->ipv6.ip6_null_entry) 3238 goto out_ip6_dst_entries; 3239 net->ipv6.ip6_null_entry->dst.path = 3240 (struct dst_entry *)net->ipv6.ip6_null_entry; 3241 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 3242 dst_init_metrics(&net->ipv6.ip6_null_entry->dst, 3243 ip6_template_metrics, true); 3244 3245#ifdef CONFIG_IPV6_MULTIPLE_TABLES 3246 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 3247 sizeof(*net->ipv6.ip6_prohibit_entry), 3248 GFP_KERNEL); 3249 if (!net->ipv6.ip6_prohibit_entry) 3250 goto out_ip6_null_entry; 3251 net->ipv6.ip6_prohibit_entry->dst.path = 3252 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 3253 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 3254 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, 3255 ip6_template_metrics, true); 3256 3257 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 3258 sizeof(*net->ipv6.ip6_blk_hole_entry), 3259 GFP_KERNEL); 3260 if (!net->ipv6.ip6_blk_hole_entry) 3261 goto out_ip6_prohibit_entry; 3262 net->ipv6.ip6_blk_hole_entry->dst.path = 3263 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 3264 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 3265 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, 3266 ip6_template_metrics, true); 3267#endif 3268 3269 net->ipv6.sysctl.flush_delay = 0; 3270 net->ipv6.sysctl.ip6_rt_max_size = 4096; 3271 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 3272 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 3273 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 3274 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 3275 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 3276 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 3277 3278 net->ipv6.ip6_rt_gc_expire = 30*HZ; 3279 3280 ret = 0; 3281out: 3282 return ret; 3283 3284#ifdef CONFIG_IPV6_MULTIPLE_TABLES 3285out_ip6_prohibit_entry: 3286 kfree(net->ipv6.ip6_prohibit_entry); 3287out_ip6_null_entry: 3288 kfree(net->ipv6.ip6_null_entry); 3289#endif 3290out_ip6_dst_entries: 3291 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 3292out_ip6_dst_ops: 3293 goto out; 3294} 3295 3296static void __net_exit ip6_route_net_exit(struct net *net) 3297{ 3298 kfree(net->ipv6.ip6_null_entry); 3299#ifdef CONFIG_IPV6_MULTIPLE_TABLES 3300 kfree(net->ipv6.ip6_prohibit_entry); 3301 kfree(net->ipv6.ip6_blk_hole_entry); 3302#endif 3303 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 3304} 3305 3306static int __net_init ip6_route_net_init_late(struct net *net) 3307{ 3308#ifdef CONFIG_PROC_FS 3309 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops); 3310 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops); 3311#endif 3312 return 0; 3313} 3314 3315static void __net_exit ip6_route_net_exit_late(struct net *net) 3316{ 3317#ifdef CONFIG_PROC_FS 3318 remove_proc_entry("ipv6_route", net->proc_net); 3319 remove_proc_entry("rt6_stats", net->proc_net); 3320#endif 3321} 3322 3323static struct pernet_operations ip6_route_net_ops = { 3324 .init = ip6_route_net_init, 3325 .exit = ip6_route_net_exit, 3326}; 3327 3328static int __net_init ipv6_inetpeer_init(struct net *net) 3329{ 3330 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); 3331 3332 if (!bp) 3333 return -ENOMEM; 3334 inet_peer_base_init(bp); 3335 net->ipv6.peers = bp; 3336 return 0; 3337} 3338 3339static void __net_exit ipv6_inetpeer_exit(struct net *net) 3340{ 3341 struct inet_peer_base *bp = net->ipv6.peers; 3342 3343 net->ipv6.peers = NULL; 3344 inetpeer_invalidate_tree(bp); 3345 kfree(bp); 3346} 3347 3348static struct pernet_operations ipv6_inetpeer_ops = { 3349 .init = ipv6_inetpeer_init, 3350 .exit = ipv6_inetpeer_exit, 3351}; 3352 3353static struct pernet_operations ip6_route_net_late_ops = { 3354 .init = ip6_route_net_init_late, 3355 .exit = ip6_route_net_exit_late, 3356}; 3357 3358static struct notifier_block ip6_route_dev_notifier = { 3359 .notifier_call = ip6_route_dev_notify, 3360 .priority = 0, 3361}; 3362 3363int __init ip6_route_init(void) 3364{ 3365 int ret; 3366 3367 ret = -ENOMEM; 3368 ip6_dst_ops_template.kmem_cachep = 3369 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 3370 SLAB_HWCACHE_ALIGN, NULL); 3371 if (!ip6_dst_ops_template.kmem_cachep) 3372 goto out; 3373 3374 ret = dst_entries_init(&ip6_dst_blackhole_ops); 3375 if (ret) 3376 goto out_kmem_cache; 3377 3378 ret = register_pernet_subsys(&ipv6_inetpeer_ops); 3379 if (ret) 3380 goto out_dst_entries; 3381 3382 ret = register_pernet_subsys(&ip6_route_net_ops); 3383 if (ret) 3384 goto out_register_inetpeer; 3385 3386 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 3387 3388 /* Registering of the loopback is done before this portion of code, 3389 * the loopback reference in rt6_info will not be taken, do it 3390 * manually for init_net */ 3391 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; 3392 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3393 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3394 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; 3395 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3396 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; 3397 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3398 #endif 3399 ret = fib6_init(); 3400 if (ret) 3401 goto out_register_subsys; 3402 3403 ret = xfrm6_init(); 3404 if (ret) 3405 goto out_fib6_init; 3406 3407 ret = fib6_rules_init(); 3408 if (ret) 3409 goto xfrm6_init; 3410 3411 ret = register_pernet_subsys(&ip6_route_net_late_ops); 3412 if (ret) 3413 goto fib6_rules_init; 3414 3415 ret = -ENOBUFS; 3416 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) || 3417 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) || 3418 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL)) 3419 goto out_register_late_subsys; 3420 3421 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 3422 if (ret) 3423 goto out_register_late_subsys; 3424 3425out: 3426 return ret; 3427 3428out_register_late_subsys: 3429 unregister_pernet_subsys(&ip6_route_net_late_ops); 3430fib6_rules_init: 3431 fib6_rules_cleanup(); 3432xfrm6_init: 3433 xfrm6_fini(); 3434out_fib6_init: 3435 fib6_gc_cleanup(); 3436out_register_subsys: 3437 unregister_pernet_subsys(&ip6_route_net_ops); 3438out_register_inetpeer: 3439 unregister_pernet_subsys(&ipv6_inetpeer_ops); 3440out_dst_entries: 3441 dst_entries_destroy(&ip6_dst_blackhole_ops); 3442out_kmem_cache: 3443 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 3444 goto out; 3445} 3446 3447void ip6_route_cleanup(void) 3448{ 3449 unregister_netdevice_notifier(&ip6_route_dev_notifier); 3450 unregister_pernet_subsys(&ip6_route_net_late_ops); 3451 fib6_rules_cleanup(); 3452 xfrm6_fini(); 3453 fib6_gc_cleanup(); 3454 unregister_pernet_subsys(&ipv6_inetpeer_ops); 3455 unregister_pernet_subsys(&ip6_route_net_ops); 3456 dst_entries_destroy(&ip6_dst_blackhole_ops); 3457 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 3458} 3459