1/*
2 *	Linux INET6 implementation
3 *	FIB front-end.
4 *
5 *	Authors:
6 *	Pedro Roque		<roque@di.fc.ul.pt>
7 *
8 *	This program is free software; you can redistribute it and/or
9 *      modify it under the terms of the GNU General Public License
10 *      as published by the Free Software Foundation; either version
11 *      2 of the License, or (at your option) any later version.
12 */
13
14/*	Changes:
15 *
16 *	YOSHIFUJI Hideaki @USAGI
17 *		reworked default router selection.
18 *		- respect outgoing interface
19 *		- select from (probably) reachable routers (i.e.
20 *		routers in REACHABLE, STALE, DELAY or PROBE states).
21 *		- always select the same router if it is (probably)
22 *		reachable.  otherwise, round-robin the list.
23 *	Ville Nuorvala
24 *		Fixed routing subtrees.
25 */
26
27#define pr_fmt(fmt) "IPv6: " fmt
28
29#include <linux/capability.h>
30#include <linux/errno.h>
31#include <linux/export.h>
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
40#include <linux/mroute6.h>
41#include <linux/init.h>
42#include <linux/if_arp.h>
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#include <linux/nsproxy.h>
46#include <linux/slab.h>
47#include <net/net_namespace.h>
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
58#include <net/netevent.h>
59#include <net/netlink.h>
60#include <net/nexthop.h>
61
62#include <asm/uaccess.h>
63
64#ifdef CONFIG_SYSCTL
65#include <linux/sysctl.h>
66#endif
67
68enum rt6_nud_state {
69	RT6_NUD_FAIL_HARD = -3,
70	RT6_NUD_FAIL_PROBE = -2,
71	RT6_NUD_FAIL_DO_RR = -1,
72	RT6_NUD_SUCCEED = 1
73};
74
75static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76				    const struct in6_addr *dest);
77static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
78static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
79static unsigned int	 ip6_mtu(const struct dst_entry *dst);
80static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81static void		ip6_dst_destroy(struct dst_entry *);
82static void		ip6_dst_ifdown(struct dst_entry *,
83				       struct net_device *dev, int how);
84static int		 ip6_dst_gc(struct dst_ops *ops);
85
86static int		ip6_pkt_discard(struct sk_buff *skb);
87static int		ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
88static int		ip6_pkt_prohibit(struct sk_buff *skb);
89static int		ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
90static void		ip6_link_failure(struct sk_buff *skb);
91static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92					   struct sk_buff *skb, u32 mtu);
93static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94					struct sk_buff *skb);
95static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
96
97#ifdef CONFIG_IPV6_ROUTE_INFO
98static struct rt6_info *rt6_add_route_info(struct net *net,
99					   const struct in6_addr *prefix, int prefixlen,
100					   const struct in6_addr *gwaddr, int ifindex,
101					   unsigned int pref);
102static struct rt6_info *rt6_get_route_info(struct net *net,
103					   const struct in6_addr *prefix, int prefixlen,
104					   const struct in6_addr *gwaddr, int ifindex);
105#endif
106
107static void rt6_bind_peer(struct rt6_info *rt, int create)
108{
109	struct inet_peer_base *base;
110	struct inet_peer *peer;
111
112	base = inetpeer_base_ptr(rt->_rt6i_peer);
113	if (!base)
114		return;
115
116	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
117	if (peer) {
118		if (!rt6_set_peer(rt, peer))
119			inet_putpeer(peer);
120	}
121}
122
123static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
124{
125	if (rt6_has_peer(rt))
126		return rt6_peer_ptr(rt);
127
128	rt6_bind_peer(rt, create);
129	return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
130}
131
132static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
133{
134	return __rt6_get_peer(rt, 1);
135}
136
137static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
138{
139	struct rt6_info *rt = (struct rt6_info *) dst;
140	struct inet_peer *peer;
141	u32 *p = NULL;
142
143	if (!(rt->dst.flags & DST_HOST))
144		return dst_cow_metrics_generic(dst, old);
145
146	peer = rt6_get_peer_create(rt);
147	if (peer) {
148		u32 *old_p = __DST_METRICS_PTR(old);
149		unsigned long prev, new;
150
151		p = peer->metrics;
152		if (inet_metrics_new(peer) ||
153		    (old & DST_METRICS_FORCE_OVERWRITE))
154			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
155
156		new = (unsigned long) p;
157		prev = cmpxchg(&dst->_metrics, old, new);
158
159		if (prev != old) {
160			p = __DST_METRICS_PTR(prev);
161			if (prev & DST_METRICS_READ_ONLY)
162				p = NULL;
163		}
164	}
165	return p;
166}
167
168static inline const void *choose_neigh_daddr(struct rt6_info *rt,
169					     struct sk_buff *skb,
170					     const void *daddr)
171{
172	struct in6_addr *p = &rt->rt6i_gateway;
173
174	if (!ipv6_addr_any(p))
175		return (const void *) p;
176	else if (skb)
177		return &ipv6_hdr(skb)->daddr;
178	return daddr;
179}
180
181static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
182					  struct sk_buff *skb,
183					  const void *daddr)
184{
185	struct rt6_info *rt = (struct rt6_info *) dst;
186	struct neighbour *n;
187
188	daddr = choose_neigh_daddr(rt, skb, daddr);
189	n = __ipv6_neigh_lookup(dst->dev, daddr);
190	if (n)
191		return n;
192	return neigh_create(&nd_tbl, daddr, dst->dev);
193}
194
195static struct dst_ops ip6_dst_ops_template = {
196	.family			=	AF_INET6,
197	.gc			=	ip6_dst_gc,
198	.gc_thresh		=	1024,
199	.check			=	ip6_dst_check,
200	.default_advmss		=	ip6_default_advmss,
201	.mtu			=	ip6_mtu,
202	.cow_metrics		=	ipv6_cow_metrics,
203	.destroy		=	ip6_dst_destroy,
204	.ifdown			=	ip6_dst_ifdown,
205	.negative_advice	=	ip6_negative_advice,
206	.link_failure		=	ip6_link_failure,
207	.update_pmtu		=	ip6_rt_update_pmtu,
208	.redirect		=	rt6_do_redirect,
209	.local_out		=	__ip6_local_out,
210	.neigh_lookup		=	ip6_neigh_lookup,
211};
212
213static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
214{
215	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
216
217	return mtu ? : dst->dev->mtu;
218}
219
220static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
221					 struct sk_buff *skb, u32 mtu)
222{
223}
224
225static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
226				      struct sk_buff *skb)
227{
228}
229
230static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
231					 unsigned long old)
232{
233	return NULL;
234}
235
236static struct dst_ops ip6_dst_blackhole_ops = {
237	.family			=	AF_INET6,
238	.destroy		=	ip6_dst_destroy,
239	.check			=	ip6_dst_check,
240	.mtu			=	ip6_blackhole_mtu,
241	.default_advmss		=	ip6_default_advmss,
242	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
243	.redirect		=	ip6_rt_blackhole_redirect,
244	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
245	.neigh_lookup		=	ip6_neigh_lookup,
246};
247
248static const u32 ip6_template_metrics[RTAX_MAX] = {
249	[RTAX_HOPLIMIT - 1] = 0,
250};
251
252static const struct rt6_info ip6_null_entry_template = {
253	.dst = {
254		.__refcnt	= ATOMIC_INIT(1),
255		.__use		= 1,
256		.obsolete	= DST_OBSOLETE_FORCE_CHK,
257		.error		= -ENETUNREACH,
258		.input		= ip6_pkt_discard,
259		.output		= ip6_pkt_discard_out,
260	},
261	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
262	.rt6i_protocol  = RTPROT_KERNEL,
263	.rt6i_metric	= ~(u32) 0,
264	.rt6i_ref	= ATOMIC_INIT(1),
265};
266
267#ifdef CONFIG_IPV6_MULTIPLE_TABLES
268
269static const struct rt6_info ip6_prohibit_entry_template = {
270	.dst = {
271		.__refcnt	= ATOMIC_INIT(1),
272		.__use		= 1,
273		.obsolete	= DST_OBSOLETE_FORCE_CHK,
274		.error		= -EACCES,
275		.input		= ip6_pkt_prohibit,
276		.output		= ip6_pkt_prohibit_out,
277	},
278	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
279	.rt6i_protocol  = RTPROT_KERNEL,
280	.rt6i_metric	= ~(u32) 0,
281	.rt6i_ref	= ATOMIC_INIT(1),
282};
283
284static const struct rt6_info ip6_blk_hole_entry_template = {
285	.dst = {
286		.__refcnt	= ATOMIC_INIT(1),
287		.__use		= 1,
288		.obsolete	= DST_OBSOLETE_FORCE_CHK,
289		.error		= -EINVAL,
290		.input		= dst_discard,
291		.output		= dst_discard_sk,
292	},
293	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
294	.rt6i_protocol  = RTPROT_KERNEL,
295	.rt6i_metric	= ~(u32) 0,
296	.rt6i_ref	= ATOMIC_INIT(1),
297};
298
299#endif
300
301/* allocate dst with ip6_dst_ops */
302static inline struct rt6_info *ip6_dst_alloc(struct net *net,
303					     struct net_device *dev,
304					     int flags,
305					     struct fib6_table *table)
306{
307	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
308					0, DST_OBSOLETE_FORCE_CHK, flags);
309
310	if (rt) {
311		struct dst_entry *dst = &rt->dst;
312
313		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
314		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
315		INIT_LIST_HEAD(&rt->rt6i_siblings);
316	}
317	return rt;
318}
319
320static void ip6_dst_destroy(struct dst_entry *dst)
321{
322	struct rt6_info *rt = (struct rt6_info *)dst;
323	struct inet6_dev *idev = rt->rt6i_idev;
324	struct dst_entry *from = dst->from;
325
326	if (!(rt->dst.flags & DST_HOST))
327		dst_destroy_metrics_generic(dst);
328
329	if (idev) {
330		rt->rt6i_idev = NULL;
331		in6_dev_put(idev);
332	}
333
334	dst->from = NULL;
335	dst_release(from);
336
337	if (rt6_has_peer(rt)) {
338		struct inet_peer *peer = rt6_peer_ptr(rt);
339		inet_putpeer(peer);
340	}
341}
342
343static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
344			   int how)
345{
346	struct rt6_info *rt = (struct rt6_info *)dst;
347	struct inet6_dev *idev = rt->rt6i_idev;
348	struct net_device *loopback_dev =
349		dev_net(dev)->loopback_dev;
350
351	if (dev != loopback_dev) {
352		if (idev && idev->dev == dev) {
353			struct inet6_dev *loopback_idev =
354				in6_dev_get(loopback_dev);
355			if (loopback_idev) {
356				rt->rt6i_idev = loopback_idev;
357				in6_dev_put(idev);
358			}
359		}
360	}
361}
362
363static bool rt6_check_expired(const struct rt6_info *rt)
364{
365	if (rt->rt6i_flags & RTF_EXPIRES) {
366		if (time_after(jiffies, rt->dst.expires))
367			return true;
368	} else if (rt->dst.from) {
369		return rt6_check_expired((struct rt6_info *) rt->dst.from);
370	}
371	return false;
372}
373
374/* Multipath route selection:
375 *   Hash based function using packet header and flowlabel.
376 * Adapted from fib_info_hashfn()
377 */
378static int rt6_info_hash_nhsfn(unsigned int candidate_count,
379			       const struct flowi6 *fl6)
380{
381	unsigned int val = fl6->flowi6_proto;
382
383	val ^= ipv6_addr_hash(&fl6->daddr);
384	val ^= ipv6_addr_hash(&fl6->saddr);
385
386	/* Work only if this not encapsulated */
387	switch (fl6->flowi6_proto) {
388	case IPPROTO_UDP:
389	case IPPROTO_TCP:
390	case IPPROTO_SCTP:
391		val ^= (__force u16)fl6->fl6_sport;
392		val ^= (__force u16)fl6->fl6_dport;
393		break;
394
395	case IPPROTO_ICMPV6:
396		val ^= (__force u16)fl6->fl6_icmp_type;
397		val ^= (__force u16)fl6->fl6_icmp_code;
398		break;
399	}
400	/* RFC6438 recommands to use flowlabel */
401	val ^= (__force u32)fl6->flowlabel;
402
403	/* Perhaps, we need to tune, this function? */
404	val = val ^ (val >> 7) ^ (val >> 12);
405	return val % candidate_count;
406}
407
408static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
409					     struct flowi6 *fl6, int oif,
410					     int strict)
411{
412	struct rt6_info *sibling, *next_sibling;
413	int route_choosen;
414
415	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
416	/* Don't change the route, if route_choosen == 0
417	 * (siblings does not include ourself)
418	 */
419	if (route_choosen)
420		list_for_each_entry_safe(sibling, next_sibling,
421				&match->rt6i_siblings, rt6i_siblings) {
422			route_choosen--;
423			if (route_choosen == 0) {
424				if (rt6_score_route(sibling, oif, strict) < 0)
425					break;
426				match = sibling;
427				break;
428			}
429		}
430	return match;
431}
432
433/*
434 *	Route lookup. Any table->tb6_lock is implied.
435 */
436
437static inline struct rt6_info *rt6_device_match(struct net *net,
438						    struct rt6_info *rt,
439						    const struct in6_addr *saddr,
440						    int oif,
441						    int flags)
442{
443	struct rt6_info *local = NULL;
444	struct rt6_info *sprt;
445
446	if (!oif && ipv6_addr_any(saddr))
447		goto out;
448
449	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
450		struct net_device *dev = sprt->dst.dev;
451
452		if (oif) {
453			if (dev->ifindex == oif)
454				return sprt;
455			if (dev->flags & IFF_LOOPBACK) {
456				if (!sprt->rt6i_idev ||
457				    sprt->rt6i_idev->dev->ifindex != oif) {
458					if (flags & RT6_LOOKUP_F_IFACE && oif)
459						continue;
460					if (local && (!oif ||
461						      local->rt6i_idev->dev->ifindex == oif))
462						continue;
463				}
464				local = sprt;
465			}
466		} else {
467			if (ipv6_chk_addr(net, saddr, dev,
468					  flags & RT6_LOOKUP_F_IFACE))
469				return sprt;
470		}
471	}
472
473	if (oif) {
474		if (local)
475			return local;
476
477		if (flags & RT6_LOOKUP_F_IFACE)
478			return net->ipv6.ip6_null_entry;
479	}
480out:
481	return rt;
482}
483
484#ifdef CONFIG_IPV6_ROUTER_PREF
485struct __rt6_probe_work {
486	struct work_struct work;
487	struct in6_addr target;
488	struct net_device *dev;
489};
490
491static void rt6_probe_deferred(struct work_struct *w)
492{
493	struct in6_addr mcaddr;
494	struct __rt6_probe_work *work =
495		container_of(w, struct __rt6_probe_work, work);
496
497	addrconf_addr_solict_mult(&work->target, &mcaddr);
498	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
499	dev_put(work->dev);
500	kfree(work);
501}
502
503static void rt6_probe(struct rt6_info *rt)
504{
505	struct neighbour *neigh;
506	/*
507	 * Okay, this does not seem to be appropriate
508	 * for now, however, we need to check if it
509	 * is really so; aka Router Reachability Probing.
510	 *
511	 * Router Reachability Probe MUST be rate-limited
512	 * to no more than one per minute.
513	 */
514	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
515		return;
516	rcu_read_lock_bh();
517	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
518	if (neigh) {
519		write_lock(&neigh->lock);
520		if (neigh->nud_state & NUD_VALID)
521			goto out;
522	}
523
524	if (!neigh ||
525	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
526		struct __rt6_probe_work *work;
527
528		work = kmalloc(sizeof(*work), GFP_ATOMIC);
529
530		if (neigh && work)
531			__neigh_set_probe_once(neigh);
532
533		if (neigh)
534			write_unlock(&neigh->lock);
535
536		if (work) {
537			INIT_WORK(&work->work, rt6_probe_deferred);
538			work->target = rt->rt6i_gateway;
539			dev_hold(rt->dst.dev);
540			work->dev = rt->dst.dev;
541			schedule_work(&work->work);
542		}
543	} else {
544out:
545		write_unlock(&neigh->lock);
546	}
547	rcu_read_unlock_bh();
548}
549#else
550static inline void rt6_probe(struct rt6_info *rt)
551{
552}
553#endif
554
555/*
556 * Default Router Selection (RFC 2461 6.3.6)
557 */
558static inline int rt6_check_dev(struct rt6_info *rt, int oif)
559{
560	struct net_device *dev = rt->dst.dev;
561	if (!oif || dev->ifindex == oif)
562		return 2;
563	if ((dev->flags & IFF_LOOPBACK) &&
564	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
565		return 1;
566	return 0;
567}
568
569static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
570{
571	struct neighbour *neigh;
572	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
573
574	if (rt->rt6i_flags & RTF_NONEXTHOP ||
575	    !(rt->rt6i_flags & RTF_GATEWAY))
576		return RT6_NUD_SUCCEED;
577
578	rcu_read_lock_bh();
579	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
580	if (neigh) {
581		read_lock(&neigh->lock);
582		if (neigh->nud_state & NUD_VALID)
583			ret = RT6_NUD_SUCCEED;
584#ifdef CONFIG_IPV6_ROUTER_PREF
585		else if (!(neigh->nud_state & NUD_FAILED))
586			ret = RT6_NUD_SUCCEED;
587		else
588			ret = RT6_NUD_FAIL_PROBE;
589#endif
590		read_unlock(&neigh->lock);
591	} else {
592		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
593		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
594	}
595	rcu_read_unlock_bh();
596
597	return ret;
598}
599
600static int rt6_score_route(struct rt6_info *rt, int oif,
601			   int strict)
602{
603	int m;
604
605	m = rt6_check_dev(rt, oif);
606	if (!m && (strict & RT6_LOOKUP_F_IFACE))
607		return RT6_NUD_FAIL_HARD;
608#ifdef CONFIG_IPV6_ROUTER_PREF
609	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
610#endif
611	if (strict & RT6_LOOKUP_F_REACHABLE) {
612		int n = rt6_check_neigh(rt);
613		if (n < 0)
614			return n;
615	}
616	return m;
617}
618
619static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
620				   int *mpri, struct rt6_info *match,
621				   bool *do_rr)
622{
623	int m;
624	bool match_do_rr = false;
625
626	if (rt6_check_expired(rt))
627		goto out;
628
629	m = rt6_score_route(rt, oif, strict);
630	if (m == RT6_NUD_FAIL_DO_RR) {
631		match_do_rr = true;
632		m = 0; /* lowest valid score */
633	} else if (m == RT6_NUD_FAIL_HARD) {
634		goto out;
635	}
636
637	if (strict & RT6_LOOKUP_F_REACHABLE)
638		rt6_probe(rt);
639
640	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
641	if (m > *mpri) {
642		*do_rr = match_do_rr;
643		*mpri = m;
644		match = rt;
645	}
646out:
647	return match;
648}
649
650static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
651				     struct rt6_info *rr_head,
652				     u32 metric, int oif, int strict,
653				     bool *do_rr)
654{
655	struct rt6_info *rt, *match;
656	int mpri = -1;
657
658	match = NULL;
659	for (rt = rr_head; rt && rt->rt6i_metric == metric;
660	     rt = rt->dst.rt6_next)
661		match = find_match(rt, oif, strict, &mpri, match, do_rr);
662	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
663	     rt = rt->dst.rt6_next)
664		match = find_match(rt, oif, strict, &mpri, match, do_rr);
665
666	return match;
667}
668
669static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
670{
671	struct rt6_info *match, *rt0;
672	struct net *net;
673	bool do_rr = false;
674
675	rt0 = fn->rr_ptr;
676	if (!rt0)
677		fn->rr_ptr = rt0 = fn->leaf;
678
679	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
680			     &do_rr);
681
682	if (do_rr) {
683		struct rt6_info *next = rt0->dst.rt6_next;
684
685		/* no entries matched; do round-robin */
686		if (!next || next->rt6i_metric != rt0->rt6i_metric)
687			next = fn->leaf;
688
689		if (next != rt0)
690			fn->rr_ptr = next;
691	}
692
693	net = dev_net(rt0->dst.dev);
694	return match ? match : net->ipv6.ip6_null_entry;
695}
696
697#ifdef CONFIG_IPV6_ROUTE_INFO
698int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
699		  const struct in6_addr *gwaddr)
700{
701	struct net *net = dev_net(dev);
702	struct route_info *rinfo = (struct route_info *) opt;
703	struct in6_addr prefix_buf, *prefix;
704	unsigned int pref;
705	unsigned long lifetime;
706	struct rt6_info *rt;
707
708	if (len < sizeof(struct route_info)) {
709		return -EINVAL;
710	}
711
712	/* Sanity check for prefix_len and length */
713	if (rinfo->length > 3) {
714		return -EINVAL;
715	} else if (rinfo->prefix_len > 128) {
716		return -EINVAL;
717	} else if (rinfo->prefix_len > 64) {
718		if (rinfo->length < 2) {
719			return -EINVAL;
720		}
721	} else if (rinfo->prefix_len > 0) {
722		if (rinfo->length < 1) {
723			return -EINVAL;
724		}
725	}
726
727	pref = rinfo->route_pref;
728	if (pref == ICMPV6_ROUTER_PREF_INVALID)
729		return -EINVAL;
730
731	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
732
733	if (rinfo->length == 3)
734		prefix = (struct in6_addr *)rinfo->prefix;
735	else {
736		/* this function is safe */
737		ipv6_addr_prefix(&prefix_buf,
738				 (struct in6_addr *)rinfo->prefix,
739				 rinfo->prefix_len);
740		prefix = &prefix_buf;
741	}
742
743	if (rinfo->prefix_len == 0)
744		rt = rt6_get_dflt_router(gwaddr, dev);
745	else
746		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
747					gwaddr, dev->ifindex);
748
749	if (rt && !lifetime) {
750		ip6_del_rt(rt);
751		rt = NULL;
752	}
753
754	if (!rt && lifetime)
755		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
756					pref);
757	else if (rt)
758		rt->rt6i_flags = RTF_ROUTEINFO |
759				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
760
761	if (rt) {
762		if (!addrconf_finite_timeout(lifetime))
763			rt6_clean_expires(rt);
764		else
765			rt6_set_expires(rt, jiffies + HZ * lifetime);
766
767		ip6_rt_put(rt);
768	}
769	return 0;
770}
771#endif
772
773static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
774					struct in6_addr *saddr)
775{
776	struct fib6_node *pn;
777	while (1) {
778		if (fn->fn_flags & RTN_TL_ROOT)
779			return NULL;
780		pn = fn->parent;
781		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
782			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
783		else
784			fn = pn;
785		if (fn->fn_flags & RTN_RTINFO)
786			return fn;
787	}
788}
789
790static struct rt6_info *ip6_pol_route_lookup(struct net *net,
791					     struct fib6_table *table,
792					     struct flowi6 *fl6, int flags)
793{
794	struct fib6_node *fn;
795	struct rt6_info *rt;
796
797	read_lock_bh(&table->tb6_lock);
798	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
799restart:
800	rt = fn->leaf;
801	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
802	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
803		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
804	if (rt == net->ipv6.ip6_null_entry) {
805		fn = fib6_backtrack(fn, &fl6->saddr);
806		if (fn)
807			goto restart;
808	}
809	dst_use(&rt->dst, jiffies);
810	read_unlock_bh(&table->tb6_lock);
811	return rt;
812
813}
814
815struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
816				    int flags)
817{
818	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
819}
820EXPORT_SYMBOL_GPL(ip6_route_lookup);
821
822struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
823			    const struct in6_addr *saddr, int oif, int strict)
824{
825	struct flowi6 fl6 = {
826		.flowi6_oif = oif,
827		.daddr = *daddr,
828	};
829	struct dst_entry *dst;
830	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
831
832	if (saddr) {
833		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
834		flags |= RT6_LOOKUP_F_HAS_SADDR;
835	}
836
837	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
838	if (dst->error == 0)
839		return (struct rt6_info *) dst;
840
841	dst_release(dst);
842
843	return NULL;
844}
845EXPORT_SYMBOL(rt6_lookup);
846
847/* ip6_ins_rt is called with FREE table->tb6_lock.
848   It takes new route entry, the addition fails by any reason the
849   route is freed. In any case, if caller does not hold it, it may
850   be destroyed.
851 */
852
853static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
854			struct mx6_config *mxc)
855{
856	int err;
857	struct fib6_table *table;
858
859	table = rt->rt6i_table;
860	write_lock_bh(&table->tb6_lock);
861	err = fib6_add(&table->tb6_root, rt, info, mxc);
862	write_unlock_bh(&table->tb6_lock);
863
864	return err;
865}
866
867int ip6_ins_rt(struct rt6_info *rt)
868{
869	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
870	struct mx6_config mxc = { .mx = NULL, };
871
872	return __ip6_ins_rt(rt, &info, &mxc);
873}
874
875static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
876				      const struct in6_addr *daddr,
877				      const struct in6_addr *saddr)
878{
879	struct rt6_info *rt;
880
881	/*
882	 *	Clone the route.
883	 */
884
885	rt = ip6_rt_copy(ort, daddr);
886
887	if (rt) {
888		if (ort->rt6i_dst.plen != 128 &&
889		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
890			rt->rt6i_flags |= RTF_ANYCAST;
891
892		rt->rt6i_flags |= RTF_CACHE;
893
894#ifdef CONFIG_IPV6_SUBTREES
895		if (rt->rt6i_src.plen && saddr) {
896			rt->rt6i_src.addr = *saddr;
897			rt->rt6i_src.plen = 128;
898		}
899#endif
900	}
901
902	return rt;
903}
904
905static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
906					const struct in6_addr *daddr)
907{
908	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
909
910	if (rt)
911		rt->rt6i_flags |= RTF_CACHE;
912	return rt;
913}
914
915static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
916				      struct flowi6 *fl6, int flags)
917{
918	struct fib6_node *fn, *saved_fn;
919	struct rt6_info *rt, *nrt;
920	int strict = 0;
921	int attempts = 3;
922	int err;
923
924	strict |= flags & RT6_LOOKUP_F_IFACE;
925	if (net->ipv6.devconf_all->forwarding == 0)
926		strict |= RT6_LOOKUP_F_REACHABLE;
927
928redo_fib6_lookup_lock:
929	read_lock_bh(&table->tb6_lock);
930
931	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
932	saved_fn = fn;
933
934redo_rt6_select:
935	rt = rt6_select(fn, oif, strict);
936	if (rt->rt6i_nsiblings)
937		rt = rt6_multipath_select(rt, fl6, oif, strict);
938	if (rt == net->ipv6.ip6_null_entry) {
939		fn = fib6_backtrack(fn, &fl6->saddr);
940		if (fn)
941			goto redo_rt6_select;
942		else if (strict & RT6_LOOKUP_F_REACHABLE) {
943			/* also consider unreachable route */
944			strict &= ~RT6_LOOKUP_F_REACHABLE;
945			fn = saved_fn;
946			goto redo_rt6_select;
947		} else {
948			dst_hold(&rt->dst);
949			read_unlock_bh(&table->tb6_lock);
950			goto out2;
951		}
952	}
953
954	dst_hold(&rt->dst);
955	read_unlock_bh(&table->tb6_lock);
956
957	if (rt->rt6i_flags & RTF_CACHE)
958		goto out2;
959
960	if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
961		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
962	else if (!(rt->dst.flags & DST_HOST))
963		nrt = rt6_alloc_clone(rt, &fl6->daddr);
964	else
965		goto out2;
966
967	ip6_rt_put(rt);
968	rt = nrt ? : net->ipv6.ip6_null_entry;
969
970	dst_hold(&rt->dst);
971	if (nrt) {
972		err = ip6_ins_rt(nrt);
973		if (!err)
974			goto out2;
975	}
976
977	if (--attempts <= 0)
978		goto out2;
979
980	/*
981	 * Race condition! In the gap, when table->tb6_lock was
982	 * released someone could insert this route.  Relookup.
983	 */
984	ip6_rt_put(rt);
985	goto redo_fib6_lookup_lock;
986
987out2:
988	rt->dst.lastuse = jiffies;
989	rt->dst.__use++;
990
991	return rt;
992}
993
994static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
995					    struct flowi6 *fl6, int flags)
996{
997	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
998}
999
1000static struct dst_entry *ip6_route_input_lookup(struct net *net,
1001						struct net_device *dev,
1002						struct flowi6 *fl6, int flags)
1003{
1004	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1005		flags |= RT6_LOOKUP_F_IFACE;
1006
1007	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1008}
1009
1010void ip6_route_input(struct sk_buff *skb)
1011{
1012	const struct ipv6hdr *iph = ipv6_hdr(skb);
1013	struct net *net = dev_net(skb->dev);
1014	int flags = RT6_LOOKUP_F_HAS_SADDR;
1015	struct flowi6 fl6 = {
1016		.flowi6_iif = skb->dev->ifindex,
1017		.daddr = iph->daddr,
1018		.saddr = iph->saddr,
1019		.flowlabel = ip6_flowinfo(iph),
1020		.flowi6_mark = skb->mark,
1021		.flowi6_proto = iph->nexthdr,
1022	};
1023
1024	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1025}
1026
1027static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1028					     struct flowi6 *fl6, int flags)
1029{
1030	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1031}
1032
1033struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1034					 struct flowi6 *fl6, int flags)
1035{
1036	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1037
1038	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1039		flags |= RT6_LOOKUP_F_IFACE;
1040
1041	if (!ipv6_addr_any(&fl6->saddr))
1042		flags |= RT6_LOOKUP_F_HAS_SADDR;
1043	else if (sk)
1044		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1045
1046	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1047}
1048EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1049
1050struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1051{
1052	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1053	struct dst_entry *new = NULL;
1054
1055	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1056	if (rt) {
1057		new = &rt->dst;
1058
1059		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1060		rt6_init_peer(rt, net->ipv6.peers);
1061
1062		new->__use = 1;
1063		new->input = dst_discard;
1064		new->output = dst_discard_sk;
1065
1066		if (dst_metrics_read_only(&ort->dst))
1067			new->_metrics = ort->dst._metrics;
1068		else
1069			dst_copy_metrics(new, &ort->dst);
1070		rt->rt6i_idev = ort->rt6i_idev;
1071		if (rt->rt6i_idev)
1072			in6_dev_hold(rt->rt6i_idev);
1073
1074		rt->rt6i_gateway = ort->rt6i_gateway;
1075		rt->rt6i_flags = ort->rt6i_flags;
1076		rt->rt6i_metric = 0;
1077
1078		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1079#ifdef CONFIG_IPV6_SUBTREES
1080		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1081#endif
1082
1083		dst_free(new);
1084	}
1085
1086	dst_release(dst_orig);
1087	return new ? new : ERR_PTR(-ENOMEM);
1088}
1089
1090/*
1091 *	Destination cache support functions
1092 */
1093
1094static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1095{
1096	struct rt6_info *rt;
1097
1098	rt = (struct rt6_info *) dst;
1099
1100	/* All IPV6 dsts are created with ->obsolete set to the value
1101	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1102	 * into this function always.
1103	 */
1104	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1105		return NULL;
1106
1107	if (rt6_check_expired(rt))
1108		return NULL;
1109
1110	return dst;
1111}
1112
1113static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1114{
1115	struct rt6_info *rt = (struct rt6_info *) dst;
1116
1117	if (rt) {
1118		if (rt->rt6i_flags & RTF_CACHE) {
1119			if (rt6_check_expired(rt)) {
1120				ip6_del_rt(rt);
1121				dst = NULL;
1122			}
1123		} else {
1124			dst_release(dst);
1125			dst = NULL;
1126		}
1127	}
1128	return dst;
1129}
1130
1131static void ip6_link_failure(struct sk_buff *skb)
1132{
1133	struct rt6_info *rt;
1134
1135	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1136
1137	rt = (struct rt6_info *) skb_dst(skb);
1138	if (rt) {
1139		if (rt->rt6i_flags & RTF_CACHE) {
1140			dst_hold(&rt->dst);
1141			if (ip6_del_rt(rt))
1142				dst_free(&rt->dst);
1143		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1144			rt->rt6i_node->fn_sernum = -1;
1145		}
1146	}
1147}
1148
1149static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1150			       struct sk_buff *skb, u32 mtu)
1151{
1152	struct rt6_info *rt6 = (struct rt6_info *)dst;
1153
1154	dst_confirm(dst);
1155	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1156		struct net *net = dev_net(dst->dev);
1157
1158		rt6->rt6i_flags |= RTF_MODIFIED;
1159		if (mtu < IPV6_MIN_MTU)
1160			mtu = IPV6_MIN_MTU;
1161
1162		dst_metric_set(dst, RTAX_MTU, mtu);
1163		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1164	}
1165}
1166
1167void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1168		     int oif, u32 mark)
1169{
1170	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1171	struct dst_entry *dst;
1172	struct flowi6 fl6;
1173
1174	memset(&fl6, 0, sizeof(fl6));
1175	fl6.flowi6_oif = oif;
1176	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1177	fl6.daddr = iph->daddr;
1178	fl6.saddr = iph->saddr;
1179	fl6.flowlabel = ip6_flowinfo(iph);
1180
1181	dst = ip6_route_output(net, NULL, &fl6);
1182	if (!dst->error)
1183		ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1184	dst_release(dst);
1185}
1186EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1187
1188void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1189{
1190	ip6_update_pmtu(skb, sock_net(sk), mtu,
1191			sk->sk_bound_dev_if, sk->sk_mark);
1192}
1193EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1194
1195/* Handle redirects */
1196struct ip6rd_flowi {
1197	struct flowi6 fl6;
1198	struct in6_addr gateway;
1199};
1200
1201static struct rt6_info *__ip6_route_redirect(struct net *net,
1202					     struct fib6_table *table,
1203					     struct flowi6 *fl6,
1204					     int flags)
1205{
1206	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1207	struct rt6_info *rt;
1208	struct fib6_node *fn;
1209
1210	/* Get the "current" route for this destination and
1211	 * check if the redirect has come from approriate router.
1212	 *
1213	 * RFC 4861 specifies that redirects should only be
1214	 * accepted if they come from the nexthop to the target.
1215	 * Due to the way the routes are chosen, this notion
1216	 * is a bit fuzzy and one might need to check all possible
1217	 * routes.
1218	 */
1219
1220	read_lock_bh(&table->tb6_lock);
1221	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1222restart:
1223	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1224		if (rt6_check_expired(rt))
1225			continue;
1226		if (rt->dst.error)
1227			break;
1228		if (!(rt->rt6i_flags & RTF_GATEWAY))
1229			continue;
1230		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1231			continue;
1232		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1233			continue;
1234		break;
1235	}
1236
1237	if (!rt)
1238		rt = net->ipv6.ip6_null_entry;
1239	else if (rt->dst.error) {
1240		rt = net->ipv6.ip6_null_entry;
1241		goto out;
1242	}
1243
1244	if (rt == net->ipv6.ip6_null_entry) {
1245		fn = fib6_backtrack(fn, &fl6->saddr);
1246		if (fn)
1247			goto restart;
1248	}
1249
1250out:
1251	dst_hold(&rt->dst);
1252
1253	read_unlock_bh(&table->tb6_lock);
1254
1255	return rt;
1256};
1257
1258static struct dst_entry *ip6_route_redirect(struct net *net,
1259					const struct flowi6 *fl6,
1260					const struct in6_addr *gateway)
1261{
1262	int flags = RT6_LOOKUP_F_HAS_SADDR;
1263	struct ip6rd_flowi rdfl;
1264
1265	rdfl.fl6 = *fl6;
1266	rdfl.gateway = *gateway;
1267
1268	return fib6_rule_lookup(net, &rdfl.fl6,
1269				flags, __ip6_route_redirect);
1270}
1271
1272void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1273{
1274	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1275	struct dst_entry *dst;
1276	struct flowi6 fl6;
1277
1278	memset(&fl6, 0, sizeof(fl6));
1279	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1280	fl6.flowi6_oif = oif;
1281	fl6.flowi6_mark = mark;
1282	fl6.daddr = iph->daddr;
1283	fl6.saddr = iph->saddr;
1284	fl6.flowlabel = ip6_flowinfo(iph);
1285
1286	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1287	rt6_do_redirect(dst, NULL, skb);
1288	dst_release(dst);
1289}
1290EXPORT_SYMBOL_GPL(ip6_redirect);
1291
1292void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1293			    u32 mark)
1294{
1295	const struct ipv6hdr *iph = ipv6_hdr(skb);
1296	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1297	struct dst_entry *dst;
1298	struct flowi6 fl6;
1299
1300	memset(&fl6, 0, sizeof(fl6));
1301	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1302	fl6.flowi6_oif = oif;
1303	fl6.flowi6_mark = mark;
1304	fl6.daddr = msg->dest;
1305	fl6.saddr = iph->daddr;
1306
1307	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1308	rt6_do_redirect(dst, NULL, skb);
1309	dst_release(dst);
1310}
1311
1312void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1313{
1314	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1315}
1316EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1317
1318static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1319{
1320	struct net_device *dev = dst->dev;
1321	unsigned int mtu = dst_mtu(dst);
1322	struct net *net = dev_net(dev);
1323
1324	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1325
1326	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1327		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1328
1329	/*
1330	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1331	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1332	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1333	 * rely only on pmtu discovery"
1334	 */
1335	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1336		mtu = IPV6_MAXPLEN;
1337	return mtu;
1338}
1339
1340static unsigned int ip6_mtu(const struct dst_entry *dst)
1341{
1342	struct inet6_dev *idev;
1343	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1344
1345	if (mtu)
1346		goto out;
1347
1348	mtu = IPV6_MIN_MTU;
1349
1350	rcu_read_lock();
1351	idev = __in6_dev_get(dst->dev);
1352	if (idev)
1353		mtu = idev->cnf.mtu6;
1354	rcu_read_unlock();
1355
1356out:
1357	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1358}
1359
1360static struct dst_entry *icmp6_dst_gc_list;
1361static DEFINE_SPINLOCK(icmp6_dst_lock);
1362
1363struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1364				  struct flowi6 *fl6)
1365{
1366	struct dst_entry *dst;
1367	struct rt6_info *rt;
1368	struct inet6_dev *idev = in6_dev_get(dev);
1369	struct net *net = dev_net(dev);
1370
1371	if (unlikely(!idev))
1372		return ERR_PTR(-ENODEV);
1373
1374	rt = ip6_dst_alloc(net, dev, 0, NULL);
1375	if (unlikely(!rt)) {
1376		in6_dev_put(idev);
1377		dst = ERR_PTR(-ENOMEM);
1378		goto out;
1379	}
1380
1381	rt->dst.flags |= DST_HOST;
1382	rt->dst.output  = ip6_output;
1383	atomic_set(&rt->dst.__refcnt, 1);
1384	rt->rt6i_gateway  = fl6->daddr;
1385	rt->rt6i_dst.addr = fl6->daddr;
1386	rt->rt6i_dst.plen = 128;
1387	rt->rt6i_idev     = idev;
1388	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1389
1390	spin_lock_bh(&icmp6_dst_lock);
1391	rt->dst.next = icmp6_dst_gc_list;
1392	icmp6_dst_gc_list = &rt->dst;
1393	spin_unlock_bh(&icmp6_dst_lock);
1394
1395	fib6_force_start_gc(net);
1396
1397	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1398
1399out:
1400	return dst;
1401}
1402
1403int icmp6_dst_gc(void)
1404{
1405	struct dst_entry *dst, **pprev;
1406	int more = 0;
1407
1408	spin_lock_bh(&icmp6_dst_lock);
1409	pprev = &icmp6_dst_gc_list;
1410
1411	while ((dst = *pprev) != NULL) {
1412		if (!atomic_read(&dst->__refcnt)) {
1413			*pprev = dst->next;
1414			dst_free(dst);
1415		} else {
1416			pprev = &dst->next;
1417			++more;
1418		}
1419	}
1420
1421	spin_unlock_bh(&icmp6_dst_lock);
1422
1423	return more;
1424}
1425
1426static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1427			    void *arg)
1428{
1429	struct dst_entry *dst, **pprev;
1430
1431	spin_lock_bh(&icmp6_dst_lock);
1432	pprev = &icmp6_dst_gc_list;
1433	while ((dst = *pprev) != NULL) {
1434		struct rt6_info *rt = (struct rt6_info *) dst;
1435		if (func(rt, arg)) {
1436			*pprev = dst->next;
1437			dst_free(dst);
1438		} else {
1439			pprev = &dst->next;
1440		}
1441	}
1442	spin_unlock_bh(&icmp6_dst_lock);
1443}
1444
1445static int ip6_dst_gc(struct dst_ops *ops)
1446{
1447	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1448	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1449	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1450	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1451	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1452	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1453	int entries;
1454
1455	entries = dst_entries_get_fast(ops);
1456	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1457	    entries <= rt_max_size)
1458		goto out;
1459
1460	net->ipv6.ip6_rt_gc_expire++;
1461	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1462	entries = dst_entries_get_slow(ops);
1463	if (entries < ops->gc_thresh)
1464		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1465out:
1466	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1467	return entries > rt_max_size;
1468}
1469
1470static int ip6_convert_metrics(struct mx6_config *mxc,
1471			       const struct fib6_config *cfg)
1472{
1473	struct nlattr *nla;
1474	int remaining;
1475	u32 *mp;
1476
1477	if (!cfg->fc_mx)
1478		return 0;
1479
1480	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1481	if (unlikely(!mp))
1482		return -ENOMEM;
1483
1484	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1485		int type = nla_type(nla);
1486
1487		if (type) {
1488			u32 val;
1489
1490			if (unlikely(type > RTAX_MAX))
1491				goto err;
1492			if (type == RTAX_CC_ALGO) {
1493				char tmp[TCP_CA_NAME_MAX];
1494
1495				nla_strlcpy(tmp, nla, sizeof(tmp));
1496				val = tcp_ca_get_key_by_name(tmp);
1497				if (val == TCP_CA_UNSPEC)
1498					goto err;
1499			} else {
1500				val = nla_get_u32(nla);
1501			}
1502
1503			mp[type - 1] = val;
1504			__set_bit(type - 1, mxc->mx_valid);
1505		}
1506	}
1507
1508	mxc->mx = mp;
1509
1510	return 0;
1511 err:
1512	kfree(mp);
1513	return -EINVAL;
1514}
1515
1516int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
1517{
1518	int err;
1519	struct net *net = cfg->fc_nlinfo.nl_net;
1520	struct rt6_info *rt = NULL;
1521	struct net_device *dev = NULL;
1522	struct inet6_dev *idev = NULL;
1523	struct fib6_table *table;
1524	int addr_type;
1525
1526	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1527		return -EINVAL;
1528#ifndef CONFIG_IPV6_SUBTREES
1529	if (cfg->fc_src_len)
1530		return -EINVAL;
1531#endif
1532	if (cfg->fc_ifindex) {
1533		err = -ENODEV;
1534		dev = dev_get_by_index(net, cfg->fc_ifindex);
1535		if (!dev)
1536			goto out;
1537		idev = in6_dev_get(dev);
1538		if (!idev)
1539			goto out;
1540	}
1541
1542	if (cfg->fc_metric == 0)
1543		cfg->fc_metric = IP6_RT_PRIO_USER;
1544
1545	err = -ENOBUFS;
1546	if (cfg->fc_nlinfo.nlh &&
1547	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1548		table = fib6_get_table(net, cfg->fc_table);
1549		if (!table) {
1550			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1551			table = fib6_new_table(net, cfg->fc_table);
1552		}
1553	} else {
1554		table = fib6_new_table(net, cfg->fc_table);
1555	}
1556
1557	if (!table)
1558		goto out;
1559
1560	rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1561
1562	if (!rt) {
1563		err = -ENOMEM;
1564		goto out;
1565	}
1566
1567	if (cfg->fc_flags & RTF_EXPIRES)
1568		rt6_set_expires(rt, jiffies +
1569				clock_t_to_jiffies(cfg->fc_expires));
1570	else
1571		rt6_clean_expires(rt);
1572
1573	if (cfg->fc_protocol == RTPROT_UNSPEC)
1574		cfg->fc_protocol = RTPROT_BOOT;
1575	rt->rt6i_protocol = cfg->fc_protocol;
1576
1577	addr_type = ipv6_addr_type(&cfg->fc_dst);
1578
1579	if (addr_type & IPV6_ADDR_MULTICAST)
1580		rt->dst.input = ip6_mc_input;
1581	else if (cfg->fc_flags & RTF_LOCAL)
1582		rt->dst.input = ip6_input;
1583	else
1584		rt->dst.input = ip6_forward;
1585
1586	rt->dst.output = ip6_output;
1587
1588	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1589	rt->rt6i_dst.plen = cfg->fc_dst_len;
1590	if (rt->rt6i_dst.plen == 128) {
1591		rt->dst.flags |= DST_HOST;
1592		dst_metrics_set_force_overwrite(&rt->dst);
1593	}
1594
1595#ifdef CONFIG_IPV6_SUBTREES
1596	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1597	rt->rt6i_src.plen = cfg->fc_src_len;
1598#endif
1599
1600	rt->rt6i_metric = cfg->fc_metric;
1601
1602	/* We cannot add true routes via loopback here,
1603	   they would result in kernel looping; promote them to reject routes
1604	 */
1605	if ((cfg->fc_flags & RTF_REJECT) ||
1606	    (dev && (dev->flags & IFF_LOOPBACK) &&
1607	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1608	     !(cfg->fc_flags & RTF_LOCAL))) {
1609		/* hold loopback dev/idev if we haven't done so. */
1610		if (dev != net->loopback_dev) {
1611			if (dev) {
1612				dev_put(dev);
1613				in6_dev_put(idev);
1614			}
1615			dev = net->loopback_dev;
1616			dev_hold(dev);
1617			idev = in6_dev_get(dev);
1618			if (!idev) {
1619				err = -ENODEV;
1620				goto out;
1621			}
1622		}
1623		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1624		switch (cfg->fc_type) {
1625		case RTN_BLACKHOLE:
1626			rt->dst.error = -EINVAL;
1627			rt->dst.output = dst_discard_sk;
1628			rt->dst.input = dst_discard;
1629			break;
1630		case RTN_PROHIBIT:
1631			rt->dst.error = -EACCES;
1632			rt->dst.output = ip6_pkt_prohibit_out;
1633			rt->dst.input = ip6_pkt_prohibit;
1634			break;
1635		case RTN_THROW:
1636		default:
1637			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1638					: -ENETUNREACH;
1639			rt->dst.output = ip6_pkt_discard_out;
1640			rt->dst.input = ip6_pkt_discard;
1641			break;
1642		}
1643		goto install_route;
1644	}
1645
1646	if (cfg->fc_flags & RTF_GATEWAY) {
1647		const struct in6_addr *gw_addr;
1648		int gwa_type;
1649
1650		gw_addr = &cfg->fc_gateway;
1651		rt->rt6i_gateway = *gw_addr;
1652		gwa_type = ipv6_addr_type(gw_addr);
1653
1654		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1655			struct rt6_info *grt;
1656
1657			/* IPv6 strictly inhibits using not link-local
1658			   addresses as nexthop address.
1659			   Otherwise, router will not able to send redirects.
1660			   It is very good, but in some (rare!) circumstances
1661			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1662			   some exceptions. --ANK
1663			 */
1664			err = -EINVAL;
1665			if (!(gwa_type & IPV6_ADDR_UNICAST))
1666				goto out;
1667
1668			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1669
1670			err = -EHOSTUNREACH;
1671			if (!grt)
1672				goto out;
1673			if (dev) {
1674				if (dev != grt->dst.dev) {
1675					ip6_rt_put(grt);
1676					goto out;
1677				}
1678			} else {
1679				dev = grt->dst.dev;
1680				idev = grt->rt6i_idev;
1681				dev_hold(dev);
1682				in6_dev_hold(grt->rt6i_idev);
1683			}
1684			if (!(grt->rt6i_flags & RTF_GATEWAY))
1685				err = 0;
1686			ip6_rt_put(grt);
1687
1688			if (err)
1689				goto out;
1690		}
1691		err = -EINVAL;
1692		if (!dev || (dev->flags & IFF_LOOPBACK))
1693			goto out;
1694	}
1695
1696	err = -ENODEV;
1697	if (!dev)
1698		goto out;
1699
1700	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1701		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1702			err = -EINVAL;
1703			goto out;
1704		}
1705		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1706		rt->rt6i_prefsrc.plen = 128;
1707	} else
1708		rt->rt6i_prefsrc.plen = 0;
1709
1710	rt->rt6i_flags = cfg->fc_flags;
1711
1712install_route:
1713	rt->dst.dev = dev;
1714	rt->rt6i_idev = idev;
1715	rt->rt6i_table = table;
1716
1717	cfg->fc_nlinfo.nl_net = dev_net(dev);
1718
1719	*rt_ret = rt;
1720
1721	return 0;
1722out:
1723	if (dev)
1724		dev_put(dev);
1725	if (idev)
1726		in6_dev_put(idev);
1727	if (rt)
1728		dst_free(&rt->dst);
1729
1730	*rt_ret = NULL;
1731
1732	return err;
1733}
1734
1735int ip6_route_add(struct fib6_config *cfg)
1736{
1737	struct mx6_config mxc = { .mx = NULL, };
1738	struct rt6_info *rt = NULL;
1739	int err;
1740
1741	err = ip6_route_info_create(cfg, &rt);
1742	if (err)
1743		goto out;
1744
1745	err = ip6_convert_metrics(&mxc, cfg);
1746	if (err)
1747		goto out;
1748
1749	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1750
1751	kfree(mxc.mx);
1752
1753	return err;
1754out:
1755	if (rt)
1756		dst_free(&rt->dst);
1757
1758	return err;
1759}
1760
1761static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1762{
1763	int err;
1764	struct fib6_table *table;
1765	struct net *net = dev_net(rt->dst.dev);
1766
1767	if (rt == net->ipv6.ip6_null_entry) {
1768		err = -ENOENT;
1769		goto out;
1770	}
1771
1772	table = rt->rt6i_table;
1773	write_lock_bh(&table->tb6_lock);
1774	err = fib6_del(rt, info);
1775	write_unlock_bh(&table->tb6_lock);
1776
1777out:
1778	ip6_rt_put(rt);
1779	return err;
1780}
1781
1782int ip6_del_rt(struct rt6_info *rt)
1783{
1784	struct nl_info info = {
1785		.nl_net = dev_net(rt->dst.dev),
1786	};
1787	return __ip6_del_rt(rt, &info);
1788}
1789
1790static int ip6_route_del(struct fib6_config *cfg)
1791{
1792	struct fib6_table *table;
1793	struct fib6_node *fn;
1794	struct rt6_info *rt;
1795	int err = -ESRCH;
1796
1797	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1798	if (!table)
1799		return err;
1800
1801	read_lock_bh(&table->tb6_lock);
1802
1803	fn = fib6_locate(&table->tb6_root,
1804			 &cfg->fc_dst, cfg->fc_dst_len,
1805			 &cfg->fc_src, cfg->fc_src_len);
1806
1807	if (fn) {
1808		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1809			if (cfg->fc_ifindex &&
1810			    (!rt->dst.dev ||
1811			     rt->dst.dev->ifindex != cfg->fc_ifindex))
1812				continue;
1813			if (cfg->fc_flags & RTF_GATEWAY &&
1814			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1815				continue;
1816			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1817				continue;
1818			dst_hold(&rt->dst);
1819			read_unlock_bh(&table->tb6_lock);
1820
1821			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1822		}
1823	}
1824	read_unlock_bh(&table->tb6_lock);
1825
1826	return err;
1827}
1828
1829static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1830{
1831	struct net *net = dev_net(skb->dev);
1832	struct netevent_redirect netevent;
1833	struct rt6_info *rt, *nrt = NULL;
1834	struct ndisc_options ndopts;
1835	struct inet6_dev *in6_dev;
1836	struct neighbour *neigh;
1837	struct rd_msg *msg;
1838	int optlen, on_link;
1839	u8 *lladdr;
1840
1841	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1842	optlen -= sizeof(*msg);
1843
1844	if (optlen < 0) {
1845		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1846		return;
1847	}
1848
1849	msg = (struct rd_msg *)icmp6_hdr(skb);
1850
1851	if (ipv6_addr_is_multicast(&msg->dest)) {
1852		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1853		return;
1854	}
1855
1856	on_link = 0;
1857	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1858		on_link = 1;
1859	} else if (ipv6_addr_type(&msg->target) !=
1860		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1861		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1862		return;
1863	}
1864
1865	in6_dev = __in6_dev_get(skb->dev);
1866	if (!in6_dev)
1867		return;
1868	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1869		return;
1870
1871	/* RFC2461 8.1:
1872	 *	The IP source address of the Redirect MUST be the same as the current
1873	 *	first-hop router for the specified ICMP Destination Address.
1874	 */
1875
1876	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1877		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1878		return;
1879	}
1880
1881	lladdr = NULL;
1882	if (ndopts.nd_opts_tgt_lladdr) {
1883		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1884					     skb->dev);
1885		if (!lladdr) {
1886			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1887			return;
1888		}
1889	}
1890
1891	rt = (struct rt6_info *) dst;
1892	if (rt == net->ipv6.ip6_null_entry) {
1893		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1894		return;
1895	}
1896
1897	/* Redirect received -> path was valid.
1898	 * Look, redirects are sent only in response to data packets,
1899	 * so that this nexthop apparently is reachable. --ANK
1900	 */
1901	dst_confirm(&rt->dst);
1902
1903	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1904	if (!neigh)
1905		return;
1906
1907	/*
1908	 *	We have finally decided to accept it.
1909	 */
1910
1911	neigh_update(neigh, lladdr, NUD_STALE,
1912		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1913		     NEIGH_UPDATE_F_OVERRIDE|
1914		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1915				     NEIGH_UPDATE_F_ISROUTER))
1916		     );
1917
1918	nrt = ip6_rt_copy(rt, &msg->dest);
1919	if (!nrt)
1920		goto out;
1921
1922	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1923	if (on_link)
1924		nrt->rt6i_flags &= ~RTF_GATEWAY;
1925
1926	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1927
1928	if (ip6_ins_rt(nrt))
1929		goto out;
1930
1931	netevent.old = &rt->dst;
1932	netevent.new = &nrt->dst;
1933	netevent.daddr = &msg->dest;
1934	netevent.neigh = neigh;
1935	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1936
1937	if (rt->rt6i_flags & RTF_CACHE) {
1938		rt = (struct rt6_info *) dst_clone(&rt->dst);
1939		ip6_del_rt(rt);
1940	}
1941
1942out:
1943	neigh_release(neigh);
1944}
1945
1946/*
1947 *	Misc support functions
1948 */
1949
1950static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1951				    const struct in6_addr *dest)
1952{
1953	struct net *net = dev_net(ort->dst.dev);
1954	struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1955					    ort->rt6i_table);
1956
1957	if (rt) {
1958		rt->dst.input = ort->dst.input;
1959		rt->dst.output = ort->dst.output;
1960		rt->dst.flags |= DST_HOST;
1961
1962		rt->rt6i_dst.addr = *dest;
1963		rt->rt6i_dst.plen = 128;
1964		dst_copy_metrics(&rt->dst, &ort->dst);
1965		rt->dst.error = ort->dst.error;
1966		rt->rt6i_idev = ort->rt6i_idev;
1967		if (rt->rt6i_idev)
1968			in6_dev_hold(rt->rt6i_idev);
1969		rt->dst.lastuse = jiffies;
1970
1971		if (ort->rt6i_flags & RTF_GATEWAY)
1972			rt->rt6i_gateway = ort->rt6i_gateway;
1973		else
1974			rt->rt6i_gateway = *dest;
1975		rt->rt6i_flags = ort->rt6i_flags;
1976		rt6_set_from(rt, ort);
1977		rt->rt6i_metric = 0;
1978
1979#ifdef CONFIG_IPV6_SUBTREES
1980		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1981#endif
1982		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1983		rt->rt6i_table = ort->rt6i_table;
1984	}
1985	return rt;
1986}
1987
1988#ifdef CONFIG_IPV6_ROUTE_INFO
1989static struct rt6_info *rt6_get_route_info(struct net *net,
1990					   const struct in6_addr *prefix, int prefixlen,
1991					   const struct in6_addr *gwaddr, int ifindex)
1992{
1993	struct fib6_node *fn;
1994	struct rt6_info *rt = NULL;
1995	struct fib6_table *table;
1996
1997	table = fib6_get_table(net, RT6_TABLE_INFO);
1998	if (!table)
1999		return NULL;
2000
2001	read_lock_bh(&table->tb6_lock);
2002	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2003	if (!fn)
2004		goto out;
2005
2006	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2007		if (rt->dst.dev->ifindex != ifindex)
2008			continue;
2009		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2010			continue;
2011		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2012			continue;
2013		dst_hold(&rt->dst);
2014		break;
2015	}
2016out:
2017	read_unlock_bh(&table->tb6_lock);
2018	return rt;
2019}
2020
2021static struct rt6_info *rt6_add_route_info(struct net *net,
2022					   const struct in6_addr *prefix, int prefixlen,
2023					   const struct in6_addr *gwaddr, int ifindex,
2024					   unsigned int pref)
2025{
2026	struct fib6_config cfg = {
2027		.fc_table	= RT6_TABLE_INFO,
2028		.fc_metric	= IP6_RT_PRIO_USER,
2029		.fc_ifindex	= ifindex,
2030		.fc_dst_len	= prefixlen,
2031		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2032				  RTF_UP | RTF_PREF(pref),
2033		.fc_nlinfo.portid = 0,
2034		.fc_nlinfo.nlh = NULL,
2035		.fc_nlinfo.nl_net = net,
2036	};
2037
2038	cfg.fc_dst = *prefix;
2039	cfg.fc_gateway = *gwaddr;
2040
2041	/* We should treat it as a default route if prefix length is 0. */
2042	if (!prefixlen)
2043		cfg.fc_flags |= RTF_DEFAULT;
2044
2045	ip6_route_add(&cfg);
2046
2047	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2048}
2049#endif
2050
2051struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2052{
2053	struct rt6_info *rt;
2054	struct fib6_table *table;
2055
2056	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2057	if (!table)
2058		return NULL;
2059
2060	read_lock_bh(&table->tb6_lock);
2061	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2062		if (dev == rt->dst.dev &&
2063		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2064		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2065			break;
2066	}
2067	if (rt)
2068		dst_hold(&rt->dst);
2069	read_unlock_bh(&table->tb6_lock);
2070	return rt;
2071}
2072
2073struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2074				     struct net_device *dev,
2075				     unsigned int pref)
2076{
2077	struct fib6_config cfg = {
2078		.fc_table	= RT6_TABLE_DFLT,
2079		.fc_metric	= IP6_RT_PRIO_USER,
2080		.fc_ifindex	= dev->ifindex,
2081		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2082				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2083		.fc_nlinfo.portid = 0,
2084		.fc_nlinfo.nlh = NULL,
2085		.fc_nlinfo.nl_net = dev_net(dev),
2086	};
2087
2088	cfg.fc_gateway = *gwaddr;
2089
2090	ip6_route_add(&cfg);
2091
2092	return rt6_get_dflt_router(gwaddr, dev);
2093}
2094
2095void rt6_purge_dflt_routers(struct net *net)
2096{
2097	struct rt6_info *rt;
2098	struct fib6_table *table;
2099
2100	/* NOTE: Keep consistent with rt6_get_dflt_router */
2101	table = fib6_get_table(net, RT6_TABLE_DFLT);
2102	if (!table)
2103		return;
2104
2105restart:
2106	read_lock_bh(&table->tb6_lock);
2107	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2108		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2109		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2110			dst_hold(&rt->dst);
2111			read_unlock_bh(&table->tb6_lock);
2112			ip6_del_rt(rt);
2113			goto restart;
2114		}
2115	}
2116	read_unlock_bh(&table->tb6_lock);
2117}
2118
2119static void rtmsg_to_fib6_config(struct net *net,
2120				 struct in6_rtmsg *rtmsg,
2121				 struct fib6_config *cfg)
2122{
2123	memset(cfg, 0, sizeof(*cfg));
2124
2125	cfg->fc_table = RT6_TABLE_MAIN;
2126	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2127	cfg->fc_metric = rtmsg->rtmsg_metric;
2128	cfg->fc_expires = rtmsg->rtmsg_info;
2129	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2130	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2131	cfg->fc_flags = rtmsg->rtmsg_flags;
2132
2133	cfg->fc_nlinfo.nl_net = net;
2134
2135	cfg->fc_dst = rtmsg->rtmsg_dst;
2136	cfg->fc_src = rtmsg->rtmsg_src;
2137	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2138}
2139
2140int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2141{
2142	struct fib6_config cfg;
2143	struct in6_rtmsg rtmsg;
2144	int err;
2145
2146	switch (cmd) {
2147	case SIOCADDRT:		/* Add a route */
2148	case SIOCDELRT:		/* Delete a route */
2149		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2150			return -EPERM;
2151		err = copy_from_user(&rtmsg, arg,
2152				     sizeof(struct in6_rtmsg));
2153		if (err)
2154			return -EFAULT;
2155
2156		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2157
2158		rtnl_lock();
2159		switch (cmd) {
2160		case SIOCADDRT:
2161			err = ip6_route_add(&cfg);
2162			break;
2163		case SIOCDELRT:
2164			err = ip6_route_del(&cfg);
2165			break;
2166		default:
2167			err = -EINVAL;
2168		}
2169		rtnl_unlock();
2170
2171		return err;
2172	}
2173
2174	return -EINVAL;
2175}
2176
2177/*
2178 *	Drop the packet on the floor
2179 */
2180
2181static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2182{
2183	int type;
2184	struct dst_entry *dst = skb_dst(skb);
2185	switch (ipstats_mib_noroutes) {
2186	case IPSTATS_MIB_INNOROUTES:
2187		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2188		if (type == IPV6_ADDR_ANY) {
2189			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2190				      IPSTATS_MIB_INADDRERRORS);
2191			break;
2192		}
2193		/* FALLTHROUGH */
2194	case IPSTATS_MIB_OUTNOROUTES:
2195		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2196			      ipstats_mib_noroutes);
2197		break;
2198	}
2199	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2200	kfree_skb(skb);
2201	return 0;
2202}
2203
2204static int ip6_pkt_discard(struct sk_buff *skb)
2205{
2206	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2207}
2208
2209static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
2210{
2211	skb->dev = skb_dst(skb)->dev;
2212	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2213}
2214
2215static int ip6_pkt_prohibit(struct sk_buff *skb)
2216{
2217	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2218}
2219
2220static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
2221{
2222	skb->dev = skb_dst(skb)->dev;
2223	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2224}
2225
2226/*
2227 *	Allocate a dst for local (unicast / anycast) address.
2228 */
2229
2230struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2231				    const struct in6_addr *addr,
2232				    bool anycast)
2233{
2234	struct net *net = dev_net(idev->dev);
2235	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2236					    DST_NOCOUNT, NULL);
2237	if (!rt)
2238		return ERR_PTR(-ENOMEM);
2239
2240	in6_dev_hold(idev);
2241
2242	rt->dst.flags |= DST_HOST;
2243	rt->dst.input = ip6_input;
2244	rt->dst.output = ip6_output;
2245	rt->rt6i_idev = idev;
2246
2247	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2248	if (anycast)
2249		rt->rt6i_flags |= RTF_ANYCAST;
2250	else
2251		rt->rt6i_flags |= RTF_LOCAL;
2252
2253	rt->rt6i_gateway  = *addr;
2254	rt->rt6i_dst.addr = *addr;
2255	rt->rt6i_dst.plen = 128;
2256	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2257
2258	atomic_set(&rt->dst.__refcnt, 1);
2259
2260	return rt;
2261}
2262
2263int ip6_route_get_saddr(struct net *net,
2264			struct rt6_info *rt,
2265			const struct in6_addr *daddr,
2266			unsigned int prefs,
2267			struct in6_addr *saddr)
2268{
2269	struct inet6_dev *idev =
2270		rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2271	int err = 0;
2272	if (rt && rt->rt6i_prefsrc.plen)
2273		*saddr = rt->rt6i_prefsrc.addr;
2274	else
2275		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2276					 daddr, prefs, saddr);
2277	return err;
2278}
2279
2280/* remove deleted ip from prefsrc entries */
2281struct arg_dev_net_ip {
2282	struct net_device *dev;
2283	struct net *net;
2284	struct in6_addr *addr;
2285};
2286
2287static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2288{
2289	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2290	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2291	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2292
2293	if (((void *)rt->dst.dev == dev || !dev) &&
2294	    rt != net->ipv6.ip6_null_entry &&
2295	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2296		/* remove prefsrc entry */
2297		rt->rt6i_prefsrc.plen = 0;
2298	}
2299	return 0;
2300}
2301
2302void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2303{
2304	struct net *net = dev_net(ifp->idev->dev);
2305	struct arg_dev_net_ip adni = {
2306		.dev = ifp->idev->dev,
2307		.net = net,
2308		.addr = &ifp->addr,
2309	};
2310	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2311}
2312
2313#define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2314#define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2315
2316/* Remove routers and update dst entries when gateway turn into host. */
2317static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2318{
2319	struct in6_addr *gateway = (struct in6_addr *)arg;
2320
2321	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2322	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2323	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2324		return -1;
2325	}
2326	return 0;
2327}
2328
2329void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2330{
2331	fib6_clean_all(net, fib6_clean_tohost, gateway);
2332}
2333
2334struct arg_dev_net {
2335	struct net_device *dev;
2336	struct net *net;
2337};
2338
2339static int fib6_ifdown(struct rt6_info *rt, void *arg)
2340{
2341	const struct arg_dev_net *adn = arg;
2342	const struct net_device *dev = adn->dev;
2343
2344	if ((rt->dst.dev == dev || !dev) &&
2345	    rt != adn->net->ipv6.ip6_null_entry)
2346		return -1;
2347
2348	return 0;
2349}
2350
2351void rt6_ifdown(struct net *net, struct net_device *dev)
2352{
2353	struct arg_dev_net adn = {
2354		.dev = dev,
2355		.net = net,
2356	};
2357
2358	fib6_clean_all(net, fib6_ifdown, &adn);
2359	icmp6_clean_all(fib6_ifdown, &adn);
2360}
2361
2362struct rt6_mtu_change_arg {
2363	struct net_device *dev;
2364	unsigned int mtu;
2365};
2366
2367static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2368{
2369	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2370	struct inet6_dev *idev;
2371
2372	/* In IPv6 pmtu discovery is not optional,
2373	   so that RTAX_MTU lock cannot disable it.
2374	   We still use this lock to block changes
2375	   caused by addrconf/ndisc.
2376	*/
2377
2378	idev = __in6_dev_get(arg->dev);
2379	if (!idev)
2380		return 0;
2381
2382	/* For administrative MTU increase, there is no way to discover
2383	   IPv6 PMTU increase, so PMTU increase should be updated here.
2384	   Since RFC 1981 doesn't include administrative MTU increase
2385	   update PMTU increase is a MUST. (i.e. jumbo frame)
2386	 */
2387	/*
2388	   If new MTU is less than route PMTU, this new MTU will be the
2389	   lowest MTU in the path, update the route PMTU to reflect PMTU
2390	   decreases; if new MTU is greater than route PMTU, and the
2391	   old MTU is the lowest MTU in the path, update the route PMTU
2392	   to reflect the increase. In this case if the other nodes' MTU
2393	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2394	   PMTU discouvery.
2395	 */
2396	if (rt->dst.dev == arg->dev &&
2397	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2398	    (dst_mtu(&rt->dst) >= arg->mtu ||
2399	     (dst_mtu(&rt->dst) < arg->mtu &&
2400	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2401		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2402	}
2403	return 0;
2404}
2405
2406void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2407{
2408	struct rt6_mtu_change_arg arg = {
2409		.dev = dev,
2410		.mtu = mtu,
2411	};
2412
2413	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2414}
2415
2416static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2417	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2418	[RTA_OIF]               = { .type = NLA_U32 },
2419	[RTA_IIF]		= { .type = NLA_U32 },
2420	[RTA_PRIORITY]          = { .type = NLA_U32 },
2421	[RTA_METRICS]           = { .type = NLA_NESTED },
2422	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2423	[RTA_PREF]              = { .type = NLA_U8 },
2424};
2425
2426static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2427			      struct fib6_config *cfg)
2428{
2429	struct rtmsg *rtm;
2430	struct nlattr *tb[RTA_MAX+1];
2431	unsigned int pref;
2432	int err;
2433
2434	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2435	if (err < 0)
2436		goto errout;
2437
2438	err = -EINVAL;
2439	rtm = nlmsg_data(nlh);
2440	memset(cfg, 0, sizeof(*cfg));
2441
2442	cfg->fc_table = rtm->rtm_table;
2443	cfg->fc_dst_len = rtm->rtm_dst_len;
2444	cfg->fc_src_len = rtm->rtm_src_len;
2445	cfg->fc_flags = RTF_UP;
2446	cfg->fc_protocol = rtm->rtm_protocol;
2447	cfg->fc_type = rtm->rtm_type;
2448
2449	if (rtm->rtm_type == RTN_UNREACHABLE ||
2450	    rtm->rtm_type == RTN_BLACKHOLE ||
2451	    rtm->rtm_type == RTN_PROHIBIT ||
2452	    rtm->rtm_type == RTN_THROW)
2453		cfg->fc_flags |= RTF_REJECT;
2454
2455	if (rtm->rtm_type == RTN_LOCAL)
2456		cfg->fc_flags |= RTF_LOCAL;
2457
2458	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2459	cfg->fc_nlinfo.nlh = nlh;
2460	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2461
2462	if (tb[RTA_GATEWAY]) {
2463		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2464		cfg->fc_flags |= RTF_GATEWAY;
2465	}
2466
2467	if (tb[RTA_DST]) {
2468		int plen = (rtm->rtm_dst_len + 7) >> 3;
2469
2470		if (nla_len(tb[RTA_DST]) < plen)
2471			goto errout;
2472
2473		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2474	}
2475
2476	if (tb[RTA_SRC]) {
2477		int plen = (rtm->rtm_src_len + 7) >> 3;
2478
2479		if (nla_len(tb[RTA_SRC]) < plen)
2480			goto errout;
2481
2482		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2483	}
2484
2485	if (tb[RTA_PREFSRC])
2486		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2487
2488	if (tb[RTA_OIF])
2489		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2490
2491	if (tb[RTA_PRIORITY])
2492		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2493
2494	if (tb[RTA_METRICS]) {
2495		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2496		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2497	}
2498
2499	if (tb[RTA_TABLE])
2500		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2501
2502	if (tb[RTA_MULTIPATH]) {
2503		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2504		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2505	}
2506
2507	if (tb[RTA_PREF]) {
2508		pref = nla_get_u8(tb[RTA_PREF]);
2509		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2510		    pref != ICMPV6_ROUTER_PREF_HIGH)
2511			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2512		cfg->fc_flags |= RTF_PREF(pref);
2513	}
2514
2515	err = 0;
2516errout:
2517	return err;
2518}
2519
2520struct rt6_nh {
2521	struct rt6_info *rt6_info;
2522	struct fib6_config r_cfg;
2523	struct mx6_config mxc;
2524	struct list_head next;
2525};
2526
2527static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2528{
2529	struct rt6_nh *nh;
2530
2531	list_for_each_entry(nh, rt6_nh_list, next) {
2532		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2533		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2534		        nh->r_cfg.fc_ifindex);
2535	}
2536}
2537
2538static int ip6_route_info_append(struct list_head *rt6_nh_list,
2539				 struct rt6_info *rt, struct fib6_config *r_cfg)
2540{
2541	struct rt6_nh *nh;
2542	struct rt6_info *rtnh;
2543	int err = -EEXIST;
2544
2545	list_for_each_entry(nh, rt6_nh_list, next) {
2546		/* check if rt6_info already exists */
2547		rtnh = nh->rt6_info;
2548
2549		if (rtnh->dst.dev == rt->dst.dev &&
2550		    rtnh->rt6i_idev == rt->rt6i_idev &&
2551		    ipv6_addr_equal(&rtnh->rt6i_gateway,
2552				    &rt->rt6i_gateway))
2553			return err;
2554	}
2555
2556	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2557	if (!nh)
2558		return -ENOMEM;
2559	nh->rt6_info = rt;
2560	err = ip6_convert_metrics(&nh->mxc, r_cfg);
2561	if (err) {
2562		kfree(nh);
2563		return err;
2564	}
2565	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2566	list_add_tail(&nh->next, rt6_nh_list);
2567
2568	return 0;
2569}
2570
2571static int ip6_route_multipath_add(struct fib6_config *cfg)
2572{
2573	struct fib6_config r_cfg;
2574	struct rtnexthop *rtnh;
2575	struct rt6_info *rt;
2576	struct rt6_nh *err_nh;
2577	struct rt6_nh *nh, *nh_safe;
2578	int remaining;
2579	int attrlen;
2580	int err = 1;
2581	int nhn = 0;
2582	int replace = (cfg->fc_nlinfo.nlh &&
2583		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2584	LIST_HEAD(rt6_nh_list);
2585
2586	remaining = cfg->fc_mp_len;
2587	rtnh = (struct rtnexthop *)cfg->fc_mp;
2588
2589	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
2590	 * rt6_info structs per nexthop
2591	 */
2592	while (rtnh_ok(rtnh, remaining)) {
2593		memcpy(&r_cfg, cfg, sizeof(*cfg));
2594		if (rtnh->rtnh_ifindex)
2595			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2596
2597		attrlen = rtnh_attrlen(rtnh);
2598		if (attrlen > 0) {
2599			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2600
2601			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2602			if (nla) {
2603				r_cfg.fc_gateway = nla_get_in6_addr(nla);
2604				r_cfg.fc_flags |= RTF_GATEWAY;
2605			}
2606		}
2607
2608		err = ip6_route_info_create(&r_cfg, &rt);
2609		if (err)
2610			goto cleanup;
2611
2612		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2613		if (err) {
2614			dst_free(&rt->dst);
2615			goto cleanup;
2616		}
2617
2618		rtnh = rtnh_next(rtnh, &remaining);
2619	}
2620
2621	err_nh = NULL;
2622	list_for_each_entry(nh, &rt6_nh_list, next) {
2623		err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2624		/* nh->rt6_info is used or freed at this point, reset to NULL*/
2625		nh->rt6_info = NULL;
2626		if (err) {
2627			if (replace && nhn)
2628				ip6_print_replace_route_err(&rt6_nh_list);
2629			err_nh = nh;
2630			goto add_errout;
2631		}
2632
2633		/* Because each route is added like a single route we remove
2634		 * these flags after the first nexthop: if there is a collision,
2635		 * we have already failed to add the first nexthop:
2636		 * fib6_add_rt2node() has rejected it; when replacing, old
2637		 * nexthops have been replaced by first new, the rest should
2638		 * be added to it.
2639		 */
2640		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2641						     NLM_F_REPLACE);
2642		nhn++;
2643	}
2644
2645	goto cleanup;
2646
2647add_errout:
2648	/* Delete routes that were already added */
2649	list_for_each_entry(nh, &rt6_nh_list, next) {
2650		if (err_nh == nh)
2651			break;
2652		ip6_route_del(&nh->r_cfg);
2653	}
2654
2655cleanup:
2656	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2657		if (nh->rt6_info)
2658			dst_free(&nh->rt6_info->dst);
2659		if (nh->mxc.mx)
2660			kfree(nh->mxc.mx);
2661		list_del(&nh->next);
2662		kfree(nh);
2663	}
2664
2665	return err;
2666}
2667
2668static int ip6_route_multipath_del(struct fib6_config *cfg)
2669{
2670	struct fib6_config r_cfg;
2671	struct rtnexthop *rtnh;
2672	int remaining;
2673	int attrlen;
2674	int err = 1, last_err = 0;
2675
2676	remaining = cfg->fc_mp_len;
2677	rtnh = (struct rtnexthop *)cfg->fc_mp;
2678
2679	/* Parse a Multipath Entry */
2680	while (rtnh_ok(rtnh, remaining)) {
2681		memcpy(&r_cfg, cfg, sizeof(*cfg));
2682		if (rtnh->rtnh_ifindex)
2683			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2684
2685		attrlen = rtnh_attrlen(rtnh);
2686		if (attrlen > 0) {
2687			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2688
2689			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2690			if (nla) {
2691				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2692				r_cfg.fc_flags |= RTF_GATEWAY;
2693			}
2694		}
2695		err = ip6_route_del(&r_cfg);
2696		if (err)
2697			last_err = err;
2698
2699		rtnh = rtnh_next(rtnh, &remaining);
2700	}
2701
2702	return last_err;
2703}
2704
2705static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2706{
2707	struct fib6_config cfg;
2708	int err;
2709
2710	err = rtm_to_fib6_config(skb, nlh, &cfg);
2711	if (err < 0)
2712		return err;
2713
2714	if (cfg.fc_mp)
2715		return ip6_route_multipath_del(&cfg);
2716	else
2717		return ip6_route_del(&cfg);
2718}
2719
2720static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
2721{
2722	struct fib6_config cfg;
2723	int err;
2724
2725	err = rtm_to_fib6_config(skb, nlh, &cfg);
2726	if (err < 0)
2727		return err;
2728
2729	if (cfg.fc_mp)
2730		return ip6_route_multipath_add(&cfg);
2731	else
2732		return ip6_route_add(&cfg);
2733}
2734
2735static inline size_t rt6_nlmsg_size(void)
2736{
2737	return NLMSG_ALIGN(sizeof(struct rtmsg))
2738	       + nla_total_size(16) /* RTA_SRC */
2739	       + nla_total_size(16) /* RTA_DST */
2740	       + nla_total_size(16) /* RTA_GATEWAY */
2741	       + nla_total_size(16) /* RTA_PREFSRC */
2742	       + nla_total_size(4) /* RTA_TABLE */
2743	       + nla_total_size(4) /* RTA_IIF */
2744	       + nla_total_size(4) /* RTA_OIF */
2745	       + nla_total_size(4) /* RTA_PRIORITY */
2746	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2747	       + nla_total_size(sizeof(struct rta_cacheinfo))
2748	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
2749	       + nla_total_size(1); /* RTA_PREF */
2750}
2751
2752static int rt6_fill_node(struct net *net,
2753			 struct sk_buff *skb, struct rt6_info *rt,
2754			 struct in6_addr *dst, struct in6_addr *src,
2755			 int iif, int type, u32 portid, u32 seq,
2756			 int prefix, int nowait, unsigned int flags)
2757{
2758	struct rtmsg *rtm;
2759	struct nlmsghdr *nlh;
2760	long expires;
2761	u32 table;
2762
2763	if (prefix) {	/* user wants prefix routes only */
2764		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2765			/* success since this is not a prefix route */
2766			return 1;
2767		}
2768	}
2769
2770	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2771	if (!nlh)
2772		return -EMSGSIZE;
2773
2774	rtm = nlmsg_data(nlh);
2775	rtm->rtm_family = AF_INET6;
2776	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2777	rtm->rtm_src_len = rt->rt6i_src.plen;
2778	rtm->rtm_tos = 0;
2779	if (rt->rt6i_table)
2780		table = rt->rt6i_table->tb6_id;
2781	else
2782		table = RT6_TABLE_UNSPEC;
2783	rtm->rtm_table = table;
2784	if (nla_put_u32(skb, RTA_TABLE, table))
2785		goto nla_put_failure;
2786	if (rt->rt6i_flags & RTF_REJECT) {
2787		switch (rt->dst.error) {
2788		case -EINVAL:
2789			rtm->rtm_type = RTN_BLACKHOLE;
2790			break;
2791		case -EACCES:
2792			rtm->rtm_type = RTN_PROHIBIT;
2793			break;
2794		case -EAGAIN:
2795			rtm->rtm_type = RTN_THROW;
2796			break;
2797		default:
2798			rtm->rtm_type = RTN_UNREACHABLE;
2799			break;
2800		}
2801	}
2802	else if (rt->rt6i_flags & RTF_LOCAL)
2803		rtm->rtm_type = RTN_LOCAL;
2804	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2805		rtm->rtm_type = RTN_LOCAL;
2806	else
2807		rtm->rtm_type = RTN_UNICAST;
2808	rtm->rtm_flags = 0;
2809	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2810	rtm->rtm_protocol = rt->rt6i_protocol;
2811	if (rt->rt6i_flags & RTF_DYNAMIC)
2812		rtm->rtm_protocol = RTPROT_REDIRECT;
2813	else if (rt->rt6i_flags & RTF_ADDRCONF) {
2814		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2815			rtm->rtm_protocol = RTPROT_RA;
2816		else
2817			rtm->rtm_protocol = RTPROT_KERNEL;
2818	}
2819
2820	if (rt->rt6i_flags & RTF_CACHE)
2821		rtm->rtm_flags |= RTM_F_CLONED;
2822
2823	if (dst) {
2824		if (nla_put_in6_addr(skb, RTA_DST, dst))
2825			goto nla_put_failure;
2826		rtm->rtm_dst_len = 128;
2827	} else if (rtm->rtm_dst_len)
2828		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
2829			goto nla_put_failure;
2830#ifdef CONFIG_IPV6_SUBTREES
2831	if (src) {
2832		if (nla_put_in6_addr(skb, RTA_SRC, src))
2833			goto nla_put_failure;
2834		rtm->rtm_src_len = 128;
2835	} else if (rtm->rtm_src_len &&
2836		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
2837		goto nla_put_failure;
2838#endif
2839	if (iif) {
2840#ifdef CONFIG_IPV6_MROUTE
2841		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2842			int err = ip6mr_get_route(net, skb, rtm, nowait);
2843			if (err <= 0) {
2844				if (!nowait) {
2845					if (err == 0)
2846						return 0;
2847					goto nla_put_failure;
2848				} else {
2849					if (err == -EMSGSIZE)
2850						goto nla_put_failure;
2851				}
2852			}
2853		} else
2854#endif
2855			if (nla_put_u32(skb, RTA_IIF, iif))
2856				goto nla_put_failure;
2857	} else if (dst) {
2858		struct in6_addr saddr_buf;
2859		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2860		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2861			goto nla_put_failure;
2862	}
2863
2864	if (rt->rt6i_prefsrc.plen) {
2865		struct in6_addr saddr_buf;
2866		saddr_buf = rt->rt6i_prefsrc.addr;
2867		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2868			goto nla_put_failure;
2869	}
2870
2871	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2872		goto nla_put_failure;
2873
2874	if (rt->rt6i_flags & RTF_GATEWAY) {
2875		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
2876			goto nla_put_failure;
2877	}
2878
2879	if (rt->dst.dev &&
2880	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2881		goto nla_put_failure;
2882	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2883		goto nla_put_failure;
2884
2885	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2886
2887	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2888		goto nla_put_failure;
2889
2890	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
2891		goto nla_put_failure;
2892
2893	nlmsg_end(skb, nlh);
2894	return 0;
2895
2896nla_put_failure:
2897	nlmsg_cancel(skb, nlh);
2898	return -EMSGSIZE;
2899}
2900
2901int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2902{
2903	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2904	int prefix;
2905
2906	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2907		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2908		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2909	} else
2910		prefix = 0;
2911
2912	return rt6_fill_node(arg->net,
2913		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2914		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2915		     prefix, 0, NLM_F_MULTI);
2916}
2917
2918static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2919{
2920	struct net *net = sock_net(in_skb->sk);
2921	struct nlattr *tb[RTA_MAX+1];
2922	struct rt6_info *rt;
2923	struct sk_buff *skb;
2924	struct rtmsg *rtm;
2925	struct flowi6 fl6;
2926	int err, iif = 0, oif = 0;
2927
2928	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2929	if (err < 0)
2930		goto errout;
2931
2932	err = -EINVAL;
2933	memset(&fl6, 0, sizeof(fl6));
2934
2935	if (tb[RTA_SRC]) {
2936		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2937			goto errout;
2938
2939		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2940	}
2941
2942	if (tb[RTA_DST]) {
2943		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2944			goto errout;
2945
2946		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2947	}
2948
2949	if (tb[RTA_IIF])
2950		iif = nla_get_u32(tb[RTA_IIF]);
2951
2952	if (tb[RTA_OIF])
2953		oif = nla_get_u32(tb[RTA_OIF]);
2954
2955	if (tb[RTA_MARK])
2956		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2957
2958	if (iif) {
2959		struct net_device *dev;
2960		int flags = 0;
2961
2962		dev = __dev_get_by_index(net, iif);
2963		if (!dev) {
2964			err = -ENODEV;
2965			goto errout;
2966		}
2967
2968		fl6.flowi6_iif = iif;
2969
2970		if (!ipv6_addr_any(&fl6.saddr))
2971			flags |= RT6_LOOKUP_F_HAS_SADDR;
2972
2973		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2974							       flags);
2975	} else {
2976		fl6.flowi6_oif = oif;
2977
2978		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2979	}
2980
2981	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2982	if (!skb) {
2983		ip6_rt_put(rt);
2984		err = -ENOBUFS;
2985		goto errout;
2986	}
2987
2988	/* Reserve room for dummy headers, this skb can pass
2989	   through good chunk of routing engine.
2990	 */
2991	skb_reset_mac_header(skb);
2992	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2993
2994	skb_dst_set(skb, &rt->dst);
2995
2996	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2997			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2998			    nlh->nlmsg_seq, 0, 0, 0);
2999	if (err < 0) {
3000		kfree_skb(skb);
3001		goto errout;
3002	}
3003
3004	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3005errout:
3006	return err;
3007}
3008
3009void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
3010{
3011	struct sk_buff *skb;
3012	struct net *net = info->nl_net;
3013	u32 seq;
3014	int err;
3015
3016	err = -ENOBUFS;
3017	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3018
3019	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
3020	if (!skb)
3021		goto errout;
3022
3023	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3024				event, info->portid, seq, 0, 0, 0);
3025	if (err < 0) {
3026		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3027		WARN_ON(err == -EMSGSIZE);
3028		kfree_skb(skb);
3029		goto errout;
3030	}
3031	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3032		    info->nlh, gfp_any());
3033	return;
3034errout:
3035	if (err < 0)
3036		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3037}
3038
3039static int ip6_route_dev_notify(struct notifier_block *this,
3040				unsigned long event, void *ptr)
3041{
3042	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3043	struct net *net = dev_net(dev);
3044
3045	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3046		net->ipv6.ip6_null_entry->dst.dev = dev;
3047		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3048#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3049		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3050		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3051		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3052		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3053#endif
3054	}
3055
3056	return NOTIFY_OK;
3057}
3058
3059/*
3060 *	/proc
3061 */
3062
3063#ifdef CONFIG_PROC_FS
3064
3065static const struct file_operations ipv6_route_proc_fops = {
3066	.owner		= THIS_MODULE,
3067	.open		= ipv6_route_open,
3068	.read		= seq_read,
3069	.llseek		= seq_lseek,
3070	.release	= seq_release_net,
3071};
3072
3073static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3074{
3075	struct net *net = (struct net *)seq->private;
3076	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3077		   net->ipv6.rt6_stats->fib_nodes,
3078		   net->ipv6.rt6_stats->fib_route_nodes,
3079		   net->ipv6.rt6_stats->fib_rt_alloc,
3080		   net->ipv6.rt6_stats->fib_rt_entries,
3081		   net->ipv6.rt6_stats->fib_rt_cache,
3082		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3083		   net->ipv6.rt6_stats->fib_discarded_routes);
3084
3085	return 0;
3086}
3087
3088static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3089{
3090	return single_open_net(inode, file, rt6_stats_seq_show);
3091}
3092
3093static const struct file_operations rt6_stats_seq_fops = {
3094	.owner	 = THIS_MODULE,
3095	.open	 = rt6_stats_seq_open,
3096	.read	 = seq_read,
3097	.llseek	 = seq_lseek,
3098	.release = single_release_net,
3099};
3100#endif	/* CONFIG_PROC_FS */
3101
3102#ifdef CONFIG_SYSCTL
3103
3104static
3105int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3106			      void __user *buffer, size_t *lenp, loff_t *ppos)
3107{
3108	struct net *net;
3109	int delay;
3110	if (!write)
3111		return -EINVAL;
3112
3113	net = (struct net *)ctl->extra1;
3114	delay = net->ipv6.sysctl.flush_delay;
3115	proc_dointvec(ctl, write, buffer, lenp, ppos);
3116	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3117	return 0;
3118}
3119
3120struct ctl_table ipv6_route_table_template[] = {
3121	{
3122		.procname	=	"flush",
3123		.data		=	&init_net.ipv6.sysctl.flush_delay,
3124		.maxlen		=	sizeof(int),
3125		.mode		=	0200,
3126		.proc_handler	=	ipv6_sysctl_rtcache_flush
3127	},
3128	{
3129		.procname	=	"gc_thresh",
3130		.data		=	&ip6_dst_ops_template.gc_thresh,
3131		.maxlen		=	sizeof(int),
3132		.mode		=	0644,
3133		.proc_handler	=	proc_dointvec,
3134	},
3135	{
3136		.procname	=	"max_size",
3137		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3138		.maxlen		=	sizeof(int),
3139		.mode		=	0644,
3140		.proc_handler	=	proc_dointvec,
3141	},
3142	{
3143		.procname	=	"gc_min_interval",
3144		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3145		.maxlen		=	sizeof(int),
3146		.mode		=	0644,
3147		.proc_handler	=	proc_dointvec_jiffies,
3148	},
3149	{
3150		.procname	=	"gc_timeout",
3151		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3152		.maxlen		=	sizeof(int),
3153		.mode		=	0644,
3154		.proc_handler	=	proc_dointvec_jiffies,
3155	},
3156	{
3157		.procname	=	"gc_interval",
3158		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3159		.maxlen		=	sizeof(int),
3160		.mode		=	0644,
3161		.proc_handler	=	proc_dointvec_jiffies,
3162	},
3163	{
3164		.procname	=	"gc_elasticity",
3165		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3166		.maxlen		=	sizeof(int),
3167		.mode		=	0644,
3168		.proc_handler	=	proc_dointvec,
3169	},
3170	{
3171		.procname	=	"mtu_expires",
3172		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3173		.maxlen		=	sizeof(int),
3174		.mode		=	0644,
3175		.proc_handler	=	proc_dointvec_jiffies,
3176	},
3177	{
3178		.procname	=	"min_adv_mss",
3179		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3180		.maxlen		=	sizeof(int),
3181		.mode		=	0644,
3182		.proc_handler	=	proc_dointvec,
3183	},
3184	{
3185		.procname	=	"gc_min_interval_ms",
3186		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3187		.maxlen		=	sizeof(int),
3188		.mode		=	0644,
3189		.proc_handler	=	proc_dointvec_ms_jiffies,
3190	},
3191	{ }
3192};
3193
3194struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3195{
3196	struct ctl_table *table;
3197
3198	table = kmemdup(ipv6_route_table_template,
3199			sizeof(ipv6_route_table_template),
3200			GFP_KERNEL);
3201
3202	if (table) {
3203		table[0].data = &net->ipv6.sysctl.flush_delay;
3204		table[0].extra1 = net;
3205		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3206		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3207		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3208		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3209		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3210		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3211		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3212		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3213		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3214
3215		/* Don't export sysctls to unprivileged users */
3216		if (net->user_ns != &init_user_ns)
3217			table[0].procname = NULL;
3218	}
3219
3220	return table;
3221}
3222#endif
3223
3224static int __net_init ip6_route_net_init(struct net *net)
3225{
3226	int ret = -ENOMEM;
3227
3228	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3229	       sizeof(net->ipv6.ip6_dst_ops));
3230
3231	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3232		goto out_ip6_dst_ops;
3233
3234	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3235					   sizeof(*net->ipv6.ip6_null_entry),
3236					   GFP_KERNEL);
3237	if (!net->ipv6.ip6_null_entry)
3238		goto out_ip6_dst_entries;
3239	net->ipv6.ip6_null_entry->dst.path =
3240		(struct dst_entry *)net->ipv6.ip6_null_entry;
3241	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3242	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3243			 ip6_template_metrics, true);
3244
3245#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3246	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3247					       sizeof(*net->ipv6.ip6_prohibit_entry),
3248					       GFP_KERNEL);
3249	if (!net->ipv6.ip6_prohibit_entry)
3250		goto out_ip6_null_entry;
3251	net->ipv6.ip6_prohibit_entry->dst.path =
3252		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3253	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3254	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3255			 ip6_template_metrics, true);
3256
3257	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3258					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3259					       GFP_KERNEL);
3260	if (!net->ipv6.ip6_blk_hole_entry)
3261		goto out_ip6_prohibit_entry;
3262	net->ipv6.ip6_blk_hole_entry->dst.path =
3263		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3264	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3265	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3266			 ip6_template_metrics, true);
3267#endif
3268
3269	net->ipv6.sysctl.flush_delay = 0;
3270	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3271	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3272	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3273	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3274	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3275	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3276	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3277
3278	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3279
3280	ret = 0;
3281out:
3282	return ret;
3283
3284#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3285out_ip6_prohibit_entry:
3286	kfree(net->ipv6.ip6_prohibit_entry);
3287out_ip6_null_entry:
3288	kfree(net->ipv6.ip6_null_entry);
3289#endif
3290out_ip6_dst_entries:
3291	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3292out_ip6_dst_ops:
3293	goto out;
3294}
3295
3296static void __net_exit ip6_route_net_exit(struct net *net)
3297{
3298	kfree(net->ipv6.ip6_null_entry);
3299#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3300	kfree(net->ipv6.ip6_prohibit_entry);
3301	kfree(net->ipv6.ip6_blk_hole_entry);
3302#endif
3303	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3304}
3305
3306static int __net_init ip6_route_net_init_late(struct net *net)
3307{
3308#ifdef CONFIG_PROC_FS
3309	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3310	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3311#endif
3312	return 0;
3313}
3314
3315static void __net_exit ip6_route_net_exit_late(struct net *net)
3316{
3317#ifdef CONFIG_PROC_FS
3318	remove_proc_entry("ipv6_route", net->proc_net);
3319	remove_proc_entry("rt6_stats", net->proc_net);
3320#endif
3321}
3322
3323static struct pernet_operations ip6_route_net_ops = {
3324	.init = ip6_route_net_init,
3325	.exit = ip6_route_net_exit,
3326};
3327
3328static int __net_init ipv6_inetpeer_init(struct net *net)
3329{
3330	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3331
3332	if (!bp)
3333		return -ENOMEM;
3334	inet_peer_base_init(bp);
3335	net->ipv6.peers = bp;
3336	return 0;
3337}
3338
3339static void __net_exit ipv6_inetpeer_exit(struct net *net)
3340{
3341	struct inet_peer_base *bp = net->ipv6.peers;
3342
3343	net->ipv6.peers = NULL;
3344	inetpeer_invalidate_tree(bp);
3345	kfree(bp);
3346}
3347
3348static struct pernet_operations ipv6_inetpeer_ops = {
3349	.init	=	ipv6_inetpeer_init,
3350	.exit	=	ipv6_inetpeer_exit,
3351};
3352
3353static struct pernet_operations ip6_route_net_late_ops = {
3354	.init = ip6_route_net_init_late,
3355	.exit = ip6_route_net_exit_late,
3356};
3357
3358static struct notifier_block ip6_route_dev_notifier = {
3359	.notifier_call = ip6_route_dev_notify,
3360	.priority = 0,
3361};
3362
3363int __init ip6_route_init(void)
3364{
3365	int ret;
3366
3367	ret = -ENOMEM;
3368	ip6_dst_ops_template.kmem_cachep =
3369		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3370				  SLAB_HWCACHE_ALIGN, NULL);
3371	if (!ip6_dst_ops_template.kmem_cachep)
3372		goto out;
3373
3374	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3375	if (ret)
3376		goto out_kmem_cache;
3377
3378	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3379	if (ret)
3380		goto out_dst_entries;
3381
3382	ret = register_pernet_subsys(&ip6_route_net_ops);
3383	if (ret)
3384		goto out_register_inetpeer;
3385
3386	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3387
3388	/* Registering of the loopback is done before this portion of code,
3389	 * the loopback reference in rt6_info will not be taken, do it
3390	 * manually for init_net */
3391	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3392	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3393  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3394	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3395	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3396	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3397	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3398  #endif
3399	ret = fib6_init();
3400	if (ret)
3401		goto out_register_subsys;
3402
3403	ret = xfrm6_init();
3404	if (ret)
3405		goto out_fib6_init;
3406
3407	ret = fib6_rules_init();
3408	if (ret)
3409		goto xfrm6_init;
3410
3411	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3412	if (ret)
3413		goto fib6_rules_init;
3414
3415	ret = -ENOBUFS;
3416	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3417	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3418	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3419		goto out_register_late_subsys;
3420
3421	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3422	if (ret)
3423		goto out_register_late_subsys;
3424
3425out:
3426	return ret;
3427
3428out_register_late_subsys:
3429	unregister_pernet_subsys(&ip6_route_net_late_ops);
3430fib6_rules_init:
3431	fib6_rules_cleanup();
3432xfrm6_init:
3433	xfrm6_fini();
3434out_fib6_init:
3435	fib6_gc_cleanup();
3436out_register_subsys:
3437	unregister_pernet_subsys(&ip6_route_net_ops);
3438out_register_inetpeer:
3439	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3440out_dst_entries:
3441	dst_entries_destroy(&ip6_dst_blackhole_ops);
3442out_kmem_cache:
3443	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3444	goto out;
3445}
3446
3447void ip6_route_cleanup(void)
3448{
3449	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3450	unregister_pernet_subsys(&ip6_route_net_late_ops);
3451	fib6_rules_cleanup();
3452	xfrm6_fini();
3453	fib6_gc_cleanup();
3454	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3455	unregister_pernet_subsys(&ip6_route_net_ops);
3456	dst_entries_destroy(&ip6_dst_blackhole_ops);
3457	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3458}
3459