1/*
2 *	Generic address resolution entity
3 *
4 *	Authors:
5 *	Pedro Roque		<roque@di.fc.ul.pt>
6 *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
7 *
8 *	This program is free software; you can redistribute it and/or
9 *      modify it under the terms of the GNU General Public License
10 *      as published by the Free Software Foundation; either version
11 *      2 of the License, or (at your option) any later version.
12 *
13 *	Fixes:
14 *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
15 *	Harald Welte		Add neighbour cache statistics like rtstat
16 */
17
18#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20#include <linux/slab.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/module.h>
24#include <linux/socket.h>
25#include <linux/netdevice.h>
26#include <linux/proc_fs.h>
27#ifdef CONFIG_SYSCTL
28#include <linux/sysctl.h>
29#endif
30#include <linux/times.h>
31#include <net/net_namespace.h>
32#include <net/neighbour.h>
33#include <net/dst.h>
34#include <net/sock.h>
35#include <net/netevent.h>
36#include <net/netlink.h>
37#include <linux/rtnetlink.h>
38#include <linux/random.h>
39#include <linux/string.h>
40#include <linux/log2.h>
41#include <linux/inetdevice.h>
42#include <net/addrconf.h>
43
44#define DEBUG
45#define NEIGH_DEBUG 1
46#define neigh_dbg(level, fmt, ...)		\
47do {						\
48	if (level <= NEIGH_DEBUG)		\
49		pr_debug(fmt, ##__VA_ARGS__);	\
50} while (0)
51
52#define PNEIGH_HASHMASK		0xF
53
54static void neigh_timer_handler(unsigned long arg);
55static void __neigh_notify(struct neighbour *n, int type, int flags);
56static void neigh_update_notify(struct neighbour *neigh);
57static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
58
59#ifdef CONFIG_PROC_FS
60static const struct file_operations neigh_stat_seq_fops;
61#endif
62
63/*
64   Neighbour hash table buckets are protected with rwlock tbl->lock.
65
66   - All the scans/updates to hash buckets MUST be made under this lock.
67   - NOTHING clever should be made under this lock: no callbacks
68     to protocol backends, no attempts to send something to network.
69     It will result in deadlocks, if backend/driver wants to use neighbour
70     cache.
71   - If the entry requires some non-trivial actions, increase
72     its reference count and release table lock.
73
74   Neighbour entries are protected:
75   - with reference count.
76   - with rwlock neigh->lock
77
78   Reference count prevents destruction.
79
80   neigh->lock mainly serializes ll address data and its validity state.
81   However, the same lock is used to protect another entry fields:
82    - timer
83    - resolution queue
84
85   Again, nothing clever shall be made under neigh->lock,
86   the most complicated procedure, which we allow is dev->hard_header.
87   It is supposed, that dev->hard_header is simplistic and does
88   not make callbacks to neighbour tables.
89 */
90
91static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
92{
93	kfree_skb(skb);
94	return -ENETDOWN;
95}
96
97static void neigh_cleanup_and_release(struct neighbour *neigh)
98{
99	if (neigh->parms->neigh_cleanup)
100		neigh->parms->neigh_cleanup(neigh);
101
102	__neigh_notify(neigh, RTM_DELNEIGH, 0);
103	neigh_release(neigh);
104}
105
106/*
107 * It is random distribution in the interval (1/2)*base...(3/2)*base.
108 * It corresponds to default IPv6 settings and is not overridable,
109 * because it is really reasonable choice.
110 */
111
112unsigned long neigh_rand_reach_time(unsigned long base)
113{
114	return base ? (prandom_u32() % base) + (base >> 1) : 0;
115}
116EXPORT_SYMBOL(neigh_rand_reach_time);
117
118
119static int neigh_forced_gc(struct neigh_table *tbl)
120{
121	int shrunk = 0;
122	int i;
123	struct neigh_hash_table *nht;
124
125	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
126
127	write_lock_bh(&tbl->lock);
128	nht = rcu_dereference_protected(tbl->nht,
129					lockdep_is_held(&tbl->lock));
130	for (i = 0; i < (1 << nht->hash_shift); i++) {
131		struct neighbour *n;
132		struct neighbour __rcu **np;
133
134		np = &nht->hash_buckets[i];
135		while ((n = rcu_dereference_protected(*np,
136					lockdep_is_held(&tbl->lock))) != NULL) {
137			/* Neighbour record may be discarded if:
138			 * - nobody refers to it.
139			 * - it is not permanent
140			 */
141			write_lock(&n->lock);
142			if (atomic_read(&n->refcnt) == 1 &&
143			    !(n->nud_state & NUD_PERMANENT)) {
144				rcu_assign_pointer(*np,
145					rcu_dereference_protected(n->next,
146						  lockdep_is_held(&tbl->lock)));
147				n->dead = 1;
148				shrunk	= 1;
149				write_unlock(&n->lock);
150				neigh_cleanup_and_release(n);
151				continue;
152			}
153			write_unlock(&n->lock);
154			np = &n->next;
155		}
156	}
157
158	tbl->last_flush = jiffies;
159
160	write_unlock_bh(&tbl->lock);
161
162	return shrunk;
163}
164
165static void neigh_add_timer(struct neighbour *n, unsigned long when)
166{
167	neigh_hold(n);
168	if (unlikely(mod_timer(&n->timer, when))) {
169		printk("NEIGH: BUG, double timer add, state is %x\n",
170		       n->nud_state);
171		dump_stack();
172	}
173}
174
175static int neigh_del_timer(struct neighbour *n)
176{
177	if ((n->nud_state & NUD_IN_TIMER) &&
178	    del_timer(&n->timer)) {
179		neigh_release(n);
180		return 1;
181	}
182	return 0;
183}
184
185static void pneigh_queue_purge(struct sk_buff_head *list)
186{
187	struct sk_buff *skb;
188
189	while ((skb = skb_dequeue(list)) != NULL) {
190		dev_put(skb->dev);
191		kfree_skb(skb);
192	}
193}
194
195static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
196{
197	int i;
198	struct neigh_hash_table *nht;
199
200	nht = rcu_dereference_protected(tbl->nht,
201					lockdep_is_held(&tbl->lock));
202
203	for (i = 0; i < (1 << nht->hash_shift); i++) {
204		struct neighbour *n;
205		struct neighbour __rcu **np = &nht->hash_buckets[i];
206
207		while ((n = rcu_dereference_protected(*np,
208					lockdep_is_held(&tbl->lock))) != NULL) {
209			if (dev && n->dev != dev) {
210				np = &n->next;
211				continue;
212			}
213			rcu_assign_pointer(*np,
214				   rcu_dereference_protected(n->next,
215						lockdep_is_held(&tbl->lock)));
216			write_lock(&n->lock);
217			neigh_del_timer(n);
218			n->dead = 1;
219
220			if (atomic_read(&n->refcnt) != 1) {
221				/* The most unpleasant situation.
222				   We must destroy neighbour entry,
223				   but someone still uses it.
224
225				   The destroy will be delayed until
226				   the last user releases us, but
227				   we must kill timers etc. and move
228				   it to safe state.
229				 */
230				__skb_queue_purge(&n->arp_queue);
231				n->arp_queue_len_bytes = 0;
232				n->output = neigh_blackhole;
233				if (n->nud_state & NUD_VALID)
234					n->nud_state = NUD_NOARP;
235				else
236					n->nud_state = NUD_NONE;
237				neigh_dbg(2, "neigh %p is stray\n", n);
238			}
239			write_unlock(&n->lock);
240			neigh_cleanup_and_release(n);
241		}
242	}
243}
244
245void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
246{
247	write_lock_bh(&tbl->lock);
248	neigh_flush_dev(tbl, dev);
249	write_unlock_bh(&tbl->lock);
250}
251EXPORT_SYMBOL(neigh_changeaddr);
252
253int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
254{
255	write_lock_bh(&tbl->lock);
256	neigh_flush_dev(tbl, dev);
257	pneigh_ifdown(tbl, dev);
258	write_unlock_bh(&tbl->lock);
259
260	del_timer_sync(&tbl->proxy_timer);
261	pneigh_queue_purge(&tbl->proxy_queue);
262	return 0;
263}
264EXPORT_SYMBOL(neigh_ifdown);
265
266static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
267{
268	struct neighbour *n = NULL;
269	unsigned long now = jiffies;
270	int entries;
271
272	entries = atomic_inc_return(&tbl->entries) - 1;
273	if (entries >= tbl->gc_thresh3 ||
274	    (entries >= tbl->gc_thresh2 &&
275	     time_after(now, tbl->last_flush + 5 * HZ))) {
276		if (!neigh_forced_gc(tbl) &&
277		    entries >= tbl->gc_thresh3)
278			goto out_entries;
279	}
280
281	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
282	if (!n)
283		goto out_entries;
284
285	__skb_queue_head_init(&n->arp_queue);
286	rwlock_init(&n->lock);
287	seqlock_init(&n->ha_lock);
288	n->updated	  = n->used = now;
289	n->nud_state	  = NUD_NONE;
290	n->output	  = neigh_blackhole;
291	seqlock_init(&n->hh.hh_lock);
292	n->parms	  = neigh_parms_clone(&tbl->parms);
293	setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
294
295	NEIGH_CACHE_STAT_INC(tbl, allocs);
296	n->tbl		  = tbl;
297	atomic_set(&n->refcnt, 1);
298	n->dead		  = 1;
299out:
300	return n;
301
302out_entries:
303	atomic_dec(&tbl->entries);
304	goto out;
305}
306
307static void neigh_get_hash_rnd(u32 *x)
308{
309	get_random_bytes(x, sizeof(*x));
310	*x |= 1;
311}
312
313static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
314{
315	size_t size = (1 << shift) * sizeof(struct neighbour *);
316	struct neigh_hash_table *ret;
317	struct neighbour __rcu **buckets;
318	int i;
319
320	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
321	if (!ret)
322		return NULL;
323	if (size <= PAGE_SIZE)
324		buckets = kzalloc(size, GFP_ATOMIC);
325	else
326		buckets = (struct neighbour __rcu **)
327			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
328					   get_order(size));
329	if (!buckets) {
330		kfree(ret);
331		return NULL;
332	}
333	ret->hash_buckets = buckets;
334	ret->hash_shift = shift;
335	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
336		neigh_get_hash_rnd(&ret->hash_rnd[i]);
337	return ret;
338}
339
340static void neigh_hash_free_rcu(struct rcu_head *head)
341{
342	struct neigh_hash_table *nht = container_of(head,
343						    struct neigh_hash_table,
344						    rcu);
345	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
346	struct neighbour __rcu **buckets = nht->hash_buckets;
347
348	if (size <= PAGE_SIZE)
349		kfree(buckets);
350	else
351		free_pages((unsigned long)buckets, get_order(size));
352	kfree(nht);
353}
354
355static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
356						unsigned long new_shift)
357{
358	unsigned int i, hash;
359	struct neigh_hash_table *new_nht, *old_nht;
360
361	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
362
363	old_nht = rcu_dereference_protected(tbl->nht,
364					    lockdep_is_held(&tbl->lock));
365	new_nht = neigh_hash_alloc(new_shift);
366	if (!new_nht)
367		return old_nht;
368
369	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
370		struct neighbour *n, *next;
371
372		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
373						   lockdep_is_held(&tbl->lock));
374		     n != NULL;
375		     n = next) {
376			hash = tbl->hash(n->primary_key, n->dev,
377					 new_nht->hash_rnd);
378
379			hash >>= (32 - new_nht->hash_shift);
380			next = rcu_dereference_protected(n->next,
381						lockdep_is_held(&tbl->lock));
382
383			rcu_assign_pointer(n->next,
384					   rcu_dereference_protected(
385						new_nht->hash_buckets[hash],
386						lockdep_is_held(&tbl->lock)));
387			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
388		}
389	}
390
391	rcu_assign_pointer(tbl->nht, new_nht);
392	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
393	return new_nht;
394}
395
396struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
397			       struct net_device *dev)
398{
399	struct neighbour *n;
400
401	NEIGH_CACHE_STAT_INC(tbl, lookups);
402
403	rcu_read_lock_bh();
404	n = __neigh_lookup_noref(tbl, pkey, dev);
405	if (n) {
406		if (!atomic_inc_not_zero(&n->refcnt))
407			n = NULL;
408		NEIGH_CACHE_STAT_INC(tbl, hits);
409	}
410
411	rcu_read_unlock_bh();
412	return n;
413}
414EXPORT_SYMBOL(neigh_lookup);
415
416struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
417				     const void *pkey)
418{
419	struct neighbour *n;
420	int key_len = tbl->key_len;
421	u32 hash_val;
422	struct neigh_hash_table *nht;
423
424	NEIGH_CACHE_STAT_INC(tbl, lookups);
425
426	rcu_read_lock_bh();
427	nht = rcu_dereference_bh(tbl->nht);
428	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
429
430	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
431	     n != NULL;
432	     n = rcu_dereference_bh(n->next)) {
433		if (!memcmp(n->primary_key, pkey, key_len) &&
434		    net_eq(dev_net(n->dev), net)) {
435			if (!atomic_inc_not_zero(&n->refcnt))
436				n = NULL;
437			NEIGH_CACHE_STAT_INC(tbl, hits);
438			break;
439		}
440	}
441
442	rcu_read_unlock_bh();
443	return n;
444}
445EXPORT_SYMBOL(neigh_lookup_nodev);
446
447struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
448				 struct net_device *dev, bool want_ref)
449{
450	u32 hash_val;
451	int key_len = tbl->key_len;
452	int error;
453	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
454	struct neigh_hash_table *nht;
455
456	if (!n) {
457		rc = ERR_PTR(-ENOBUFS);
458		goto out;
459	}
460
461	memcpy(n->primary_key, pkey, key_len);
462	n->dev = dev;
463	dev_hold(dev);
464
465	/* Protocol specific setup. */
466	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
467		rc = ERR_PTR(error);
468		goto out_neigh_release;
469	}
470
471	if (dev->netdev_ops->ndo_neigh_construct) {
472		error = dev->netdev_ops->ndo_neigh_construct(n);
473		if (error < 0) {
474			rc = ERR_PTR(error);
475			goto out_neigh_release;
476		}
477	}
478
479	/* Device specific setup. */
480	if (n->parms->neigh_setup &&
481	    (error = n->parms->neigh_setup(n)) < 0) {
482		rc = ERR_PTR(error);
483		goto out_neigh_release;
484	}
485
486	n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
487
488	write_lock_bh(&tbl->lock);
489	nht = rcu_dereference_protected(tbl->nht,
490					lockdep_is_held(&tbl->lock));
491
492	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
493		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
494
495	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
496
497	if (n->parms->dead) {
498		rc = ERR_PTR(-EINVAL);
499		goto out_tbl_unlock;
500	}
501
502	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
503					    lockdep_is_held(&tbl->lock));
504	     n1 != NULL;
505	     n1 = rcu_dereference_protected(n1->next,
506			lockdep_is_held(&tbl->lock))) {
507		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
508			if (want_ref)
509				neigh_hold(n1);
510			rc = n1;
511			goto out_tbl_unlock;
512		}
513	}
514
515	n->dead = 0;
516	if (want_ref)
517		neigh_hold(n);
518	rcu_assign_pointer(n->next,
519			   rcu_dereference_protected(nht->hash_buckets[hash_val],
520						     lockdep_is_held(&tbl->lock)));
521	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
522	write_unlock_bh(&tbl->lock);
523	neigh_dbg(2, "neigh %p is created\n", n);
524	rc = n;
525out:
526	return rc;
527out_tbl_unlock:
528	write_unlock_bh(&tbl->lock);
529out_neigh_release:
530	neigh_release(n);
531	goto out;
532}
533EXPORT_SYMBOL(__neigh_create);
534
535static u32 pneigh_hash(const void *pkey, int key_len)
536{
537	u32 hash_val = *(u32 *)(pkey + key_len - 4);
538	hash_val ^= (hash_val >> 16);
539	hash_val ^= hash_val >> 8;
540	hash_val ^= hash_val >> 4;
541	hash_val &= PNEIGH_HASHMASK;
542	return hash_val;
543}
544
545static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
546					      struct net *net,
547					      const void *pkey,
548					      int key_len,
549					      struct net_device *dev)
550{
551	while (n) {
552		if (!memcmp(n->key, pkey, key_len) &&
553		    net_eq(pneigh_net(n), net) &&
554		    (n->dev == dev || !n->dev))
555			return n;
556		n = n->next;
557	}
558	return NULL;
559}
560
561struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
562		struct net *net, const void *pkey, struct net_device *dev)
563{
564	int key_len = tbl->key_len;
565	u32 hash_val = pneigh_hash(pkey, key_len);
566
567	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
568				 net, pkey, key_len, dev);
569}
570EXPORT_SYMBOL_GPL(__pneigh_lookup);
571
572struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
573				    struct net *net, const void *pkey,
574				    struct net_device *dev, int creat)
575{
576	struct pneigh_entry *n;
577	int key_len = tbl->key_len;
578	u32 hash_val = pneigh_hash(pkey, key_len);
579
580	read_lock_bh(&tbl->lock);
581	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
582			      net, pkey, key_len, dev);
583	read_unlock_bh(&tbl->lock);
584
585	if (n || !creat)
586		goto out;
587
588	ASSERT_RTNL();
589
590	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
591	if (!n)
592		goto out;
593
594	write_pnet(&n->net, net);
595	memcpy(n->key, pkey, key_len);
596	n->dev = dev;
597	if (dev)
598		dev_hold(dev);
599
600	if (tbl->pconstructor && tbl->pconstructor(n)) {
601		if (dev)
602			dev_put(dev);
603		kfree(n);
604		n = NULL;
605		goto out;
606	}
607
608	write_lock_bh(&tbl->lock);
609	n->next = tbl->phash_buckets[hash_val];
610	tbl->phash_buckets[hash_val] = n;
611	write_unlock_bh(&tbl->lock);
612out:
613	return n;
614}
615EXPORT_SYMBOL(pneigh_lookup);
616
617
618int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
619		  struct net_device *dev)
620{
621	struct pneigh_entry *n, **np;
622	int key_len = tbl->key_len;
623	u32 hash_val = pneigh_hash(pkey, key_len);
624
625	write_lock_bh(&tbl->lock);
626	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
627	     np = &n->next) {
628		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
629		    net_eq(pneigh_net(n), net)) {
630			*np = n->next;
631			write_unlock_bh(&tbl->lock);
632			if (tbl->pdestructor)
633				tbl->pdestructor(n);
634			if (n->dev)
635				dev_put(n->dev);
636			kfree(n);
637			return 0;
638		}
639	}
640	write_unlock_bh(&tbl->lock);
641	return -ENOENT;
642}
643
644static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
645{
646	struct pneigh_entry *n, **np;
647	u32 h;
648
649	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
650		np = &tbl->phash_buckets[h];
651		while ((n = *np) != NULL) {
652			if (!dev || n->dev == dev) {
653				*np = n->next;
654				if (tbl->pdestructor)
655					tbl->pdestructor(n);
656				if (n->dev)
657					dev_put(n->dev);
658				kfree(n);
659				continue;
660			}
661			np = &n->next;
662		}
663	}
664	return -ENOENT;
665}
666
667static void neigh_parms_destroy(struct neigh_parms *parms);
668
669static inline void neigh_parms_put(struct neigh_parms *parms)
670{
671	if (atomic_dec_and_test(&parms->refcnt))
672		neigh_parms_destroy(parms);
673}
674
675/*
676 *	neighbour must already be out of the table;
677 *
678 */
679void neigh_destroy(struct neighbour *neigh)
680{
681	struct net_device *dev = neigh->dev;
682
683	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
684
685	if (!neigh->dead) {
686		pr_warn("Destroying alive neighbour %p\n", neigh);
687		dump_stack();
688		return;
689	}
690
691	if (neigh_del_timer(neigh))
692		pr_warn("Impossible event\n");
693
694	write_lock_bh(&neigh->lock);
695	__skb_queue_purge(&neigh->arp_queue);
696	write_unlock_bh(&neigh->lock);
697	neigh->arp_queue_len_bytes = 0;
698
699	if (dev->netdev_ops->ndo_neigh_destroy)
700		dev->netdev_ops->ndo_neigh_destroy(neigh);
701
702	dev_put(dev);
703	neigh_parms_put(neigh->parms);
704
705	neigh_dbg(2, "neigh %p is destroyed\n", neigh);
706
707	atomic_dec(&neigh->tbl->entries);
708	kfree_rcu(neigh, rcu);
709}
710EXPORT_SYMBOL(neigh_destroy);
711
712/* Neighbour state is suspicious;
713   disable fast path.
714
715   Called with write_locked neigh.
716 */
717static void neigh_suspect(struct neighbour *neigh)
718{
719	neigh_dbg(2, "neigh %p is suspected\n", neigh);
720
721	neigh->output = neigh->ops->output;
722}
723
724/* Neighbour state is OK;
725   enable fast path.
726
727   Called with write_locked neigh.
728 */
729static void neigh_connect(struct neighbour *neigh)
730{
731	neigh_dbg(2, "neigh %p is connected\n", neigh);
732
733	neigh->output = neigh->ops->connected_output;
734}
735
736static void neigh_periodic_work(struct work_struct *work)
737{
738	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
739	struct neighbour *n;
740	struct neighbour __rcu **np;
741	unsigned int i;
742	struct neigh_hash_table *nht;
743
744	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
745
746	write_lock_bh(&tbl->lock);
747	nht = rcu_dereference_protected(tbl->nht,
748					lockdep_is_held(&tbl->lock));
749
750	/*
751	 *	periodically recompute ReachableTime from random function
752	 */
753
754	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
755		struct neigh_parms *p;
756		tbl->last_rand = jiffies;
757		list_for_each_entry(p, &tbl->parms_list, list)
758			p->reachable_time =
759				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
760	}
761
762	if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
763		goto out;
764
765	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
766		np = &nht->hash_buckets[i];
767
768		while ((n = rcu_dereference_protected(*np,
769				lockdep_is_held(&tbl->lock))) != NULL) {
770			unsigned int state;
771
772			write_lock(&n->lock);
773
774			state = n->nud_state;
775			if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
776				write_unlock(&n->lock);
777				goto next_elt;
778			}
779
780			if (time_before(n->used, n->confirmed))
781				n->used = n->confirmed;
782
783			if (atomic_read(&n->refcnt) == 1 &&
784			    (state == NUD_FAILED ||
785			     time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
786				*np = n->next;
787				n->dead = 1;
788				write_unlock(&n->lock);
789				neigh_cleanup_and_release(n);
790				continue;
791			}
792			write_unlock(&n->lock);
793
794next_elt:
795			np = &n->next;
796		}
797		/*
798		 * It's fine to release lock here, even if hash table
799		 * grows while we are preempted.
800		 */
801		write_unlock_bh(&tbl->lock);
802		cond_resched();
803		write_lock_bh(&tbl->lock);
804		nht = rcu_dereference_protected(tbl->nht,
805						lockdep_is_held(&tbl->lock));
806	}
807out:
808	/* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
809	 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
810	 * BASE_REACHABLE_TIME.
811	 */
812	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
813			      NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
814	write_unlock_bh(&tbl->lock);
815}
816
817static __inline__ int neigh_max_probes(struct neighbour *n)
818{
819	struct neigh_parms *p = n->parms;
820	return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
821	       (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
822	        NEIGH_VAR(p, MCAST_PROBES));
823}
824
825static void neigh_invalidate(struct neighbour *neigh)
826	__releases(neigh->lock)
827	__acquires(neigh->lock)
828{
829	struct sk_buff *skb;
830
831	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
832	neigh_dbg(2, "neigh %p is failed\n", neigh);
833	neigh->updated = jiffies;
834
835	/* It is very thin place. report_unreachable is very complicated
836	   routine. Particularly, it can hit the same neighbour entry!
837
838	   So that, we try to be accurate and avoid dead loop. --ANK
839	 */
840	while (neigh->nud_state == NUD_FAILED &&
841	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
842		write_unlock(&neigh->lock);
843		neigh->ops->error_report(neigh, skb);
844		write_lock(&neigh->lock);
845	}
846	__skb_queue_purge(&neigh->arp_queue);
847	neigh->arp_queue_len_bytes = 0;
848}
849
850static void neigh_probe(struct neighbour *neigh)
851	__releases(neigh->lock)
852{
853	struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
854	/* keep skb alive even if arp_queue overflows */
855	if (skb)
856		skb = skb_copy(skb, GFP_ATOMIC);
857	write_unlock(&neigh->lock);
858	neigh->ops->solicit(neigh, skb);
859	atomic_inc(&neigh->probes);
860	kfree_skb(skb);
861}
862
863/* Called when a timer expires for a neighbour entry. */
864
865static void neigh_timer_handler(unsigned long arg)
866{
867	unsigned long now, next;
868	struct neighbour *neigh = (struct neighbour *)arg;
869	unsigned int state;
870	int notify = 0;
871
872	write_lock(&neigh->lock);
873
874	state = neigh->nud_state;
875	now = jiffies;
876	next = now + HZ;
877
878	if (!(state & NUD_IN_TIMER))
879		goto out;
880
881	if (state & NUD_REACHABLE) {
882		if (time_before_eq(now,
883				   neigh->confirmed + neigh->parms->reachable_time)) {
884			neigh_dbg(2, "neigh %p is still alive\n", neigh);
885			next = neigh->confirmed + neigh->parms->reachable_time;
886		} else if (time_before_eq(now,
887					  neigh->used +
888					  NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
889			neigh_dbg(2, "neigh %p is delayed\n", neigh);
890			neigh->nud_state = NUD_DELAY;
891			neigh->updated = jiffies;
892			neigh_suspect(neigh);
893			next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
894		} else {
895			neigh_dbg(2, "neigh %p is suspected\n", neigh);
896			neigh->nud_state = NUD_STALE;
897			neigh->updated = jiffies;
898			neigh_suspect(neigh);
899			notify = 1;
900		}
901	} else if (state & NUD_DELAY) {
902		if (time_before_eq(now,
903				   neigh->confirmed +
904				   NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
905			neigh_dbg(2, "neigh %p is now reachable\n", neigh);
906			neigh->nud_state = NUD_REACHABLE;
907			neigh->updated = jiffies;
908			neigh_connect(neigh);
909			notify = 1;
910			next = neigh->confirmed + neigh->parms->reachable_time;
911		} else {
912			neigh_dbg(2, "neigh %p is probed\n", neigh);
913			neigh->nud_state = NUD_PROBE;
914			neigh->updated = jiffies;
915			atomic_set(&neigh->probes, 0);
916			next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
917		}
918	} else {
919		/* NUD_PROBE|NUD_INCOMPLETE */
920		next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
921	}
922
923	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
924	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
925		neigh->nud_state = NUD_FAILED;
926		notify = 1;
927		neigh_invalidate(neigh);
928		goto out;
929	}
930
931	if (neigh->nud_state & NUD_IN_TIMER) {
932		if (time_before(next, jiffies + HZ/2))
933			next = jiffies + HZ/2;
934		if (!mod_timer(&neigh->timer, next))
935			neigh_hold(neigh);
936	}
937	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
938		neigh_probe(neigh);
939	} else {
940out:
941		write_unlock(&neigh->lock);
942	}
943
944	if (notify)
945		neigh_update_notify(neigh);
946
947	neigh_release(neigh);
948}
949
950int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
951{
952	int rc;
953	bool immediate_probe = false;
954
955	write_lock_bh(&neigh->lock);
956
957	rc = 0;
958	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
959		goto out_unlock_bh;
960	if (neigh->dead)
961		goto out_dead;
962
963	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
964		if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
965		    NEIGH_VAR(neigh->parms, APP_PROBES)) {
966			unsigned long next, now = jiffies;
967
968			atomic_set(&neigh->probes,
969				   NEIGH_VAR(neigh->parms, UCAST_PROBES));
970			neigh->nud_state     = NUD_INCOMPLETE;
971			neigh->updated = now;
972			next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
973					 HZ/2);
974			neigh_add_timer(neigh, next);
975			immediate_probe = true;
976		} else {
977			neigh->nud_state = NUD_FAILED;
978			neigh->updated = jiffies;
979			write_unlock_bh(&neigh->lock);
980
981			kfree_skb(skb);
982			return 1;
983		}
984	} else if (neigh->nud_state & NUD_STALE) {
985		neigh_dbg(2, "neigh %p is delayed\n", neigh);
986		neigh->nud_state = NUD_DELAY;
987		neigh->updated = jiffies;
988		neigh_add_timer(neigh, jiffies +
989				NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
990	}
991
992	if (neigh->nud_state == NUD_INCOMPLETE) {
993		if (skb) {
994			while (neigh->arp_queue_len_bytes + skb->truesize >
995			       NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
996				struct sk_buff *buff;
997
998				buff = __skb_dequeue(&neigh->arp_queue);
999				if (!buff)
1000					break;
1001				neigh->arp_queue_len_bytes -= buff->truesize;
1002				kfree_skb(buff);
1003				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1004			}
1005			skb_dst_force(skb);
1006			__skb_queue_tail(&neigh->arp_queue, skb);
1007			neigh->arp_queue_len_bytes += skb->truesize;
1008		}
1009		rc = 1;
1010	}
1011out_unlock_bh:
1012	if (immediate_probe)
1013		neigh_probe(neigh);
1014	else
1015		write_unlock(&neigh->lock);
1016	local_bh_enable();
1017	return rc;
1018
1019out_dead:
1020	if (neigh->nud_state & NUD_STALE)
1021		goto out_unlock_bh;
1022	write_unlock_bh(&neigh->lock);
1023	kfree_skb(skb);
1024	return 1;
1025}
1026EXPORT_SYMBOL(__neigh_event_send);
1027
1028static void neigh_update_hhs(struct neighbour *neigh)
1029{
1030	struct hh_cache *hh;
1031	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1032		= NULL;
1033
1034	if (neigh->dev->header_ops)
1035		update = neigh->dev->header_ops->cache_update;
1036
1037	if (update) {
1038		hh = &neigh->hh;
1039		if (hh->hh_len) {
1040			write_seqlock_bh(&hh->hh_lock);
1041			update(hh, neigh->dev, neigh->ha);
1042			write_sequnlock_bh(&hh->hh_lock);
1043		}
1044	}
1045}
1046
1047
1048
1049/* Generic update routine.
1050   -- lladdr is new lladdr or NULL, if it is not supplied.
1051   -- new    is new state.
1052   -- flags
1053	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1054				if it is different.
1055	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1056				lladdr instead of overriding it
1057				if it is different.
1058				It also allows to retain current state
1059				if lladdr is unchanged.
1060	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1061
1062	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1063				NTF_ROUTER flag.
1064	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1065				a router.
1066
1067   Caller MUST hold reference count on the entry.
1068 */
1069
1070int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1071		 u32 flags)
1072{
1073	u8 old;
1074	int err;
1075	int notify = 0;
1076	struct net_device *dev;
1077	int update_isrouter = 0;
1078
1079	write_lock_bh(&neigh->lock);
1080
1081	dev    = neigh->dev;
1082	old    = neigh->nud_state;
1083	err    = -EPERM;
1084
1085	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1086	    (old & (NUD_NOARP | NUD_PERMANENT)))
1087		goto out;
1088	if (neigh->dead)
1089		goto out;
1090
1091	if (!(new & NUD_VALID)) {
1092		neigh_del_timer(neigh);
1093		if (old & NUD_CONNECTED)
1094			neigh_suspect(neigh);
1095		neigh->nud_state = new;
1096		err = 0;
1097		notify = old & NUD_VALID;
1098		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1099		    (new & NUD_FAILED)) {
1100			neigh_invalidate(neigh);
1101			notify = 1;
1102		}
1103		goto out;
1104	}
1105
1106	/* Compare new lladdr with cached one */
1107	if (!dev->addr_len) {
1108		/* First case: device needs no address. */
1109		lladdr = neigh->ha;
1110	} else if (lladdr) {
1111		/* The second case: if something is already cached
1112		   and a new address is proposed:
1113		   - compare new & old
1114		   - if they are different, check override flag
1115		 */
1116		if ((old & NUD_VALID) &&
1117		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1118			lladdr = neigh->ha;
1119	} else {
1120		/* No address is supplied; if we know something,
1121		   use it, otherwise discard the request.
1122		 */
1123		err = -EINVAL;
1124		if (!(old & NUD_VALID))
1125			goto out;
1126		lladdr = neigh->ha;
1127	}
1128
1129	if (new & NUD_CONNECTED)
1130		neigh->confirmed = jiffies;
1131	neigh->updated = jiffies;
1132
1133	/* If entry was valid and address is not changed,
1134	   do not change entry state, if new one is STALE.
1135	 */
1136	err = 0;
1137	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1138	if (old & NUD_VALID) {
1139		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1140			update_isrouter = 0;
1141			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1142			    (old & NUD_CONNECTED)) {
1143				lladdr = neigh->ha;
1144				new = NUD_STALE;
1145			} else
1146				goto out;
1147		} else {
1148			if (lladdr == neigh->ha && new == NUD_STALE &&
1149			    ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1150			     (old & NUD_CONNECTED))
1151			    )
1152				new = old;
1153		}
1154	}
1155
1156	if (new != old) {
1157		neigh_del_timer(neigh);
1158		if (new & NUD_IN_TIMER)
1159			neigh_add_timer(neigh, (jiffies +
1160						((new & NUD_REACHABLE) ?
1161						 neigh->parms->reachable_time :
1162						 0)));
1163		neigh->nud_state = new;
1164		notify = 1;
1165	}
1166
1167	if (lladdr != neigh->ha) {
1168		write_seqlock(&neigh->ha_lock);
1169		memcpy(&neigh->ha, lladdr, dev->addr_len);
1170		write_sequnlock(&neigh->ha_lock);
1171		neigh_update_hhs(neigh);
1172		if (!(new & NUD_CONNECTED))
1173			neigh->confirmed = jiffies -
1174				      (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1175		notify = 1;
1176	}
1177	if (new == old)
1178		goto out;
1179	if (new & NUD_CONNECTED)
1180		neigh_connect(neigh);
1181	else
1182		neigh_suspect(neigh);
1183	if (!(old & NUD_VALID)) {
1184		struct sk_buff *skb;
1185
1186		/* Again: avoid dead loop if something went wrong */
1187
1188		while (neigh->nud_state & NUD_VALID &&
1189		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1190			struct dst_entry *dst = skb_dst(skb);
1191			struct neighbour *n2, *n1 = neigh;
1192			write_unlock_bh(&neigh->lock);
1193
1194			rcu_read_lock();
1195
1196			/* Why not just use 'neigh' as-is?  The problem is that
1197			 * things such as shaper, eql, and sch_teql can end up
1198			 * using alternative, different, neigh objects to output
1199			 * the packet in the output path.  So what we need to do
1200			 * here is re-lookup the top-level neigh in the path so
1201			 * we can reinject the packet there.
1202			 */
1203			n2 = NULL;
1204			if (dst) {
1205				n2 = dst_neigh_lookup_skb(dst, skb);
1206				if (n2)
1207					n1 = n2;
1208			}
1209			n1->output(n1, skb);
1210			if (n2)
1211				neigh_release(n2);
1212			rcu_read_unlock();
1213
1214			write_lock_bh(&neigh->lock);
1215		}
1216		__skb_queue_purge(&neigh->arp_queue);
1217		neigh->arp_queue_len_bytes = 0;
1218	}
1219out:
1220	if (update_isrouter) {
1221		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1222			(neigh->flags | NTF_ROUTER) :
1223			(neigh->flags & ~NTF_ROUTER);
1224	}
1225	write_unlock_bh(&neigh->lock);
1226
1227	if (notify)
1228		neigh_update_notify(neigh);
1229
1230	return err;
1231}
1232EXPORT_SYMBOL(neigh_update);
1233
1234/* Update the neigh to listen temporarily for probe responses, even if it is
1235 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1236 */
1237void __neigh_set_probe_once(struct neighbour *neigh)
1238{
1239	if (neigh->dead)
1240		return;
1241	neigh->updated = jiffies;
1242	if (!(neigh->nud_state & NUD_FAILED))
1243		return;
1244	neigh->nud_state = NUD_INCOMPLETE;
1245	atomic_set(&neigh->probes, neigh_max_probes(neigh));
1246	neigh_add_timer(neigh,
1247			jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1248}
1249EXPORT_SYMBOL(__neigh_set_probe_once);
1250
1251struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1252				 u8 *lladdr, void *saddr,
1253				 struct net_device *dev)
1254{
1255	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1256						 lladdr || !dev->addr_len);
1257	if (neigh)
1258		neigh_update(neigh, lladdr, NUD_STALE,
1259			     NEIGH_UPDATE_F_OVERRIDE);
1260	return neigh;
1261}
1262EXPORT_SYMBOL(neigh_event_ns);
1263
1264/* called with read_lock_bh(&n->lock); */
1265static void neigh_hh_init(struct neighbour *n)
1266{
1267	struct net_device *dev = n->dev;
1268	__be16 prot = n->tbl->protocol;
1269	struct hh_cache	*hh = &n->hh;
1270
1271	write_lock_bh(&n->lock);
1272
1273	/* Only one thread can come in here and initialize the
1274	 * hh_cache entry.
1275	 */
1276	if (!hh->hh_len)
1277		dev->header_ops->cache(n, hh, prot);
1278
1279	write_unlock_bh(&n->lock);
1280}
1281
1282/* Slow and careful. */
1283
1284int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1285{
1286	int rc = 0;
1287
1288	if (!neigh_event_send(neigh, skb)) {
1289		int err;
1290		struct net_device *dev = neigh->dev;
1291		unsigned int seq;
1292
1293		if (dev->header_ops->cache && !neigh->hh.hh_len)
1294			neigh_hh_init(neigh);
1295
1296		do {
1297			__skb_pull(skb, skb_network_offset(skb));
1298			seq = read_seqbegin(&neigh->ha_lock);
1299			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1300					      neigh->ha, NULL, skb->len);
1301		} while (read_seqretry(&neigh->ha_lock, seq));
1302
1303		if (err >= 0)
1304			rc = dev_queue_xmit(skb);
1305		else
1306			goto out_kfree_skb;
1307	}
1308out:
1309	return rc;
1310out_kfree_skb:
1311	rc = -EINVAL;
1312	kfree_skb(skb);
1313	goto out;
1314}
1315EXPORT_SYMBOL(neigh_resolve_output);
1316
1317/* As fast as possible without hh cache */
1318
1319int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1320{
1321	struct net_device *dev = neigh->dev;
1322	unsigned int seq;
1323	int err;
1324
1325	do {
1326		__skb_pull(skb, skb_network_offset(skb));
1327		seq = read_seqbegin(&neigh->ha_lock);
1328		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1329				      neigh->ha, NULL, skb->len);
1330	} while (read_seqretry(&neigh->ha_lock, seq));
1331
1332	if (err >= 0)
1333		err = dev_queue_xmit(skb);
1334	else {
1335		err = -EINVAL;
1336		kfree_skb(skb);
1337	}
1338	return err;
1339}
1340EXPORT_SYMBOL(neigh_connected_output);
1341
1342int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1343{
1344	return dev_queue_xmit(skb);
1345}
1346EXPORT_SYMBOL(neigh_direct_output);
1347
1348static void neigh_proxy_process(unsigned long arg)
1349{
1350	struct neigh_table *tbl = (struct neigh_table *)arg;
1351	long sched_next = 0;
1352	unsigned long now = jiffies;
1353	struct sk_buff *skb, *n;
1354
1355	spin_lock(&tbl->proxy_queue.lock);
1356
1357	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1358		long tdif = NEIGH_CB(skb)->sched_next - now;
1359
1360		if (tdif <= 0) {
1361			struct net_device *dev = skb->dev;
1362
1363			__skb_unlink(skb, &tbl->proxy_queue);
1364			if (tbl->proxy_redo && netif_running(dev)) {
1365				rcu_read_lock();
1366				tbl->proxy_redo(skb);
1367				rcu_read_unlock();
1368			} else {
1369				kfree_skb(skb);
1370			}
1371
1372			dev_put(dev);
1373		} else if (!sched_next || tdif < sched_next)
1374			sched_next = tdif;
1375	}
1376	del_timer(&tbl->proxy_timer);
1377	if (sched_next)
1378		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1379	spin_unlock(&tbl->proxy_queue.lock);
1380}
1381
1382void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1383		    struct sk_buff *skb)
1384{
1385	unsigned long now = jiffies;
1386
1387	unsigned long sched_next = now + (prandom_u32() %
1388					  NEIGH_VAR(p, PROXY_DELAY));
1389
1390	if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1391		kfree_skb(skb);
1392		return;
1393	}
1394
1395	NEIGH_CB(skb)->sched_next = sched_next;
1396	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1397
1398	spin_lock(&tbl->proxy_queue.lock);
1399	if (del_timer(&tbl->proxy_timer)) {
1400		if (time_before(tbl->proxy_timer.expires, sched_next))
1401			sched_next = tbl->proxy_timer.expires;
1402	}
1403	skb_dst_drop(skb);
1404	dev_hold(skb->dev);
1405	__skb_queue_tail(&tbl->proxy_queue, skb);
1406	mod_timer(&tbl->proxy_timer, sched_next);
1407	spin_unlock(&tbl->proxy_queue.lock);
1408}
1409EXPORT_SYMBOL(pneigh_enqueue);
1410
1411static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1412						      struct net *net, int ifindex)
1413{
1414	struct neigh_parms *p;
1415
1416	list_for_each_entry(p, &tbl->parms_list, list) {
1417		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1418		    (!p->dev && !ifindex && net_eq(net, &init_net)))
1419			return p;
1420	}
1421
1422	return NULL;
1423}
1424
1425struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1426				      struct neigh_table *tbl)
1427{
1428	struct neigh_parms *p;
1429	struct net *net = dev_net(dev);
1430	const struct net_device_ops *ops = dev->netdev_ops;
1431
1432	p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1433	if (p) {
1434		p->tbl		  = tbl;
1435		atomic_set(&p->refcnt, 1);
1436		p->reachable_time =
1437				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1438		dev_hold(dev);
1439		p->dev = dev;
1440		write_pnet(&p->net, net);
1441		p->sysctl_table = NULL;
1442
1443		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1444			dev_put(dev);
1445			kfree(p);
1446			return NULL;
1447		}
1448
1449		write_lock_bh(&tbl->lock);
1450		list_add(&p->list, &tbl->parms.list);
1451		write_unlock_bh(&tbl->lock);
1452
1453		neigh_parms_data_state_cleanall(p);
1454	}
1455	return p;
1456}
1457EXPORT_SYMBOL(neigh_parms_alloc);
1458
1459static void neigh_rcu_free_parms(struct rcu_head *head)
1460{
1461	struct neigh_parms *parms =
1462		container_of(head, struct neigh_parms, rcu_head);
1463
1464	neigh_parms_put(parms);
1465}
1466
1467void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1468{
1469	if (!parms || parms == &tbl->parms)
1470		return;
1471	write_lock_bh(&tbl->lock);
1472	list_del(&parms->list);
1473	parms->dead = 1;
1474	write_unlock_bh(&tbl->lock);
1475	if (parms->dev)
1476		dev_put(parms->dev);
1477	call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1478}
1479EXPORT_SYMBOL(neigh_parms_release);
1480
1481static void neigh_parms_destroy(struct neigh_parms *parms)
1482{
1483	kfree(parms);
1484}
1485
1486static struct lock_class_key neigh_table_proxy_queue_class;
1487
1488static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1489
1490void neigh_table_init(int index, struct neigh_table *tbl)
1491{
1492	unsigned long now = jiffies;
1493	unsigned long phsize;
1494
1495	INIT_LIST_HEAD(&tbl->parms_list);
1496	list_add(&tbl->parms.list, &tbl->parms_list);
1497	write_pnet(&tbl->parms.net, &init_net);
1498	atomic_set(&tbl->parms.refcnt, 1);
1499	tbl->parms.reachable_time =
1500			  neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1501
1502	tbl->stats = alloc_percpu(struct neigh_statistics);
1503	if (!tbl->stats)
1504		panic("cannot create neighbour cache statistics");
1505
1506#ifdef CONFIG_PROC_FS
1507	if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1508			      &neigh_stat_seq_fops, tbl))
1509		panic("cannot create neighbour proc dir entry");
1510#endif
1511
1512	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1513
1514	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1515	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1516
1517	if (!tbl->nht || !tbl->phash_buckets)
1518		panic("cannot allocate neighbour cache hashes");
1519
1520	if (!tbl->entry_size)
1521		tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1522					tbl->key_len, NEIGH_PRIV_ALIGN);
1523	else
1524		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1525
1526	rwlock_init(&tbl->lock);
1527	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1528	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1529			tbl->parms.reachable_time);
1530	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1531	skb_queue_head_init_class(&tbl->proxy_queue,
1532			&neigh_table_proxy_queue_class);
1533
1534	tbl->last_flush = now;
1535	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1536
1537	neigh_tables[index] = tbl;
1538}
1539EXPORT_SYMBOL(neigh_table_init);
1540
1541int neigh_table_clear(int index, struct neigh_table *tbl)
1542{
1543	neigh_tables[index] = NULL;
1544	/* It is not clean... Fix it to unload IPv6 module safely */
1545	cancel_delayed_work_sync(&tbl->gc_work);
1546	del_timer_sync(&tbl->proxy_timer);
1547	pneigh_queue_purge(&tbl->proxy_queue);
1548	neigh_ifdown(tbl, NULL);
1549	if (atomic_read(&tbl->entries))
1550		pr_crit("neighbour leakage\n");
1551
1552	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1553		 neigh_hash_free_rcu);
1554	tbl->nht = NULL;
1555
1556	kfree(tbl->phash_buckets);
1557	tbl->phash_buckets = NULL;
1558
1559	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1560
1561	free_percpu(tbl->stats);
1562	tbl->stats = NULL;
1563
1564	return 0;
1565}
1566EXPORT_SYMBOL(neigh_table_clear);
1567
1568static struct neigh_table *neigh_find_table(int family)
1569{
1570	struct neigh_table *tbl = NULL;
1571
1572	switch (family) {
1573	case AF_INET:
1574		tbl = neigh_tables[NEIGH_ARP_TABLE];
1575		break;
1576	case AF_INET6:
1577		tbl = neigh_tables[NEIGH_ND_TABLE];
1578		break;
1579	case AF_DECnet:
1580		tbl = neigh_tables[NEIGH_DN_TABLE];
1581		break;
1582	}
1583
1584	return tbl;
1585}
1586
1587static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1588{
1589	struct net *net = sock_net(skb->sk);
1590	struct ndmsg *ndm;
1591	struct nlattr *dst_attr;
1592	struct neigh_table *tbl;
1593	struct neighbour *neigh;
1594	struct net_device *dev = NULL;
1595	int err = -EINVAL;
1596
1597	ASSERT_RTNL();
1598	if (nlmsg_len(nlh) < sizeof(*ndm))
1599		goto out;
1600
1601	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1602	if (dst_attr == NULL)
1603		goto out;
1604
1605	ndm = nlmsg_data(nlh);
1606	if (ndm->ndm_ifindex) {
1607		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1608		if (dev == NULL) {
1609			err = -ENODEV;
1610			goto out;
1611		}
1612	}
1613
1614	tbl = neigh_find_table(ndm->ndm_family);
1615	if (tbl == NULL)
1616		return -EAFNOSUPPORT;
1617
1618	if (nla_len(dst_attr) < tbl->key_len)
1619		goto out;
1620
1621	if (ndm->ndm_flags & NTF_PROXY) {
1622		err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1623		goto out;
1624	}
1625
1626	if (dev == NULL)
1627		goto out;
1628
1629	neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1630	if (neigh == NULL) {
1631		err = -ENOENT;
1632		goto out;
1633	}
1634
1635	err = neigh_update(neigh, NULL, NUD_FAILED,
1636			   NEIGH_UPDATE_F_OVERRIDE |
1637			   NEIGH_UPDATE_F_ADMIN);
1638	neigh_release(neigh);
1639
1640out:
1641	return err;
1642}
1643
1644static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1645{
1646	int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1647	struct net *net = sock_net(skb->sk);
1648	struct ndmsg *ndm;
1649	struct nlattr *tb[NDA_MAX+1];
1650	struct neigh_table *tbl;
1651	struct net_device *dev = NULL;
1652	struct neighbour *neigh;
1653	void *dst, *lladdr;
1654	int err;
1655
1656	ASSERT_RTNL();
1657	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1658	if (err < 0)
1659		goto out;
1660
1661	err = -EINVAL;
1662	if (tb[NDA_DST] == NULL)
1663		goto out;
1664
1665	ndm = nlmsg_data(nlh);
1666	if (ndm->ndm_ifindex) {
1667		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1668		if (dev == NULL) {
1669			err = -ENODEV;
1670			goto out;
1671		}
1672
1673		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1674			goto out;
1675	}
1676
1677	tbl = neigh_find_table(ndm->ndm_family);
1678	if (tbl == NULL)
1679		return -EAFNOSUPPORT;
1680
1681	if (nla_len(tb[NDA_DST]) < tbl->key_len)
1682		goto out;
1683	dst = nla_data(tb[NDA_DST]);
1684	lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1685
1686	if (ndm->ndm_flags & NTF_PROXY) {
1687		struct pneigh_entry *pn;
1688
1689		err = -ENOBUFS;
1690		pn = pneigh_lookup(tbl, net, dst, dev, 1);
1691		if (pn) {
1692			pn->flags = ndm->ndm_flags;
1693			err = 0;
1694		}
1695		goto out;
1696	}
1697
1698	if (dev == NULL)
1699		goto out;
1700
1701	neigh = neigh_lookup(tbl, dst, dev);
1702	if (neigh == NULL) {
1703		if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1704			err = -ENOENT;
1705			goto out;
1706		}
1707
1708		neigh = __neigh_lookup_errno(tbl, dst, dev);
1709		if (IS_ERR(neigh)) {
1710			err = PTR_ERR(neigh);
1711			goto out;
1712		}
1713	} else {
1714		if (nlh->nlmsg_flags & NLM_F_EXCL) {
1715			err = -EEXIST;
1716			neigh_release(neigh);
1717			goto out;
1718		}
1719
1720		if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1721			flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1722	}
1723
1724	if (ndm->ndm_flags & NTF_USE) {
1725		neigh_event_send(neigh, NULL);
1726		err = 0;
1727	} else
1728		err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1729	neigh_release(neigh);
1730
1731out:
1732	return err;
1733}
1734
1735static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1736{
1737	struct nlattr *nest;
1738
1739	nest = nla_nest_start(skb, NDTA_PARMS);
1740	if (nest == NULL)
1741		return -ENOBUFS;
1742
1743	if ((parms->dev &&
1744	     nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1745	    nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1746	    nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1747			NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1748	    /* approximative value for deprecated QUEUE_LEN (in packets) */
1749	    nla_put_u32(skb, NDTPA_QUEUE_LEN,
1750			NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1751	    nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1752	    nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1753	    nla_put_u32(skb, NDTPA_UCAST_PROBES,
1754			NEIGH_VAR(parms, UCAST_PROBES)) ||
1755	    nla_put_u32(skb, NDTPA_MCAST_PROBES,
1756			NEIGH_VAR(parms, MCAST_PROBES)) ||
1757	    nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1758			NEIGH_VAR(parms, MCAST_REPROBES)) ||
1759	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1760	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1761			  NEIGH_VAR(parms, BASE_REACHABLE_TIME)) ||
1762	    nla_put_msecs(skb, NDTPA_GC_STALETIME,
1763			  NEIGH_VAR(parms, GC_STALETIME)) ||
1764	    nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1765			  NEIGH_VAR(parms, DELAY_PROBE_TIME)) ||
1766	    nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1767			  NEIGH_VAR(parms, RETRANS_TIME)) ||
1768	    nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1769			  NEIGH_VAR(parms, ANYCAST_DELAY)) ||
1770	    nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1771			  NEIGH_VAR(parms, PROXY_DELAY)) ||
1772	    nla_put_msecs(skb, NDTPA_LOCKTIME,
1773			  NEIGH_VAR(parms, LOCKTIME)))
1774		goto nla_put_failure;
1775	return nla_nest_end(skb, nest);
1776
1777nla_put_failure:
1778	nla_nest_cancel(skb, nest);
1779	return -EMSGSIZE;
1780}
1781
1782static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1783			      u32 pid, u32 seq, int type, int flags)
1784{
1785	struct nlmsghdr *nlh;
1786	struct ndtmsg *ndtmsg;
1787
1788	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1789	if (nlh == NULL)
1790		return -EMSGSIZE;
1791
1792	ndtmsg = nlmsg_data(nlh);
1793
1794	read_lock_bh(&tbl->lock);
1795	ndtmsg->ndtm_family = tbl->family;
1796	ndtmsg->ndtm_pad1   = 0;
1797	ndtmsg->ndtm_pad2   = 0;
1798
1799	if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1800	    nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1801	    nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1802	    nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1803	    nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1804		goto nla_put_failure;
1805	{
1806		unsigned long now = jiffies;
1807		unsigned int flush_delta = now - tbl->last_flush;
1808		unsigned int rand_delta = now - tbl->last_rand;
1809		struct neigh_hash_table *nht;
1810		struct ndt_config ndc = {
1811			.ndtc_key_len		= tbl->key_len,
1812			.ndtc_entry_size	= tbl->entry_size,
1813			.ndtc_entries		= atomic_read(&tbl->entries),
1814			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
1815			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
1816			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
1817		};
1818
1819		rcu_read_lock_bh();
1820		nht = rcu_dereference_bh(tbl->nht);
1821		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1822		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1823		rcu_read_unlock_bh();
1824
1825		if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1826			goto nla_put_failure;
1827	}
1828
1829	{
1830		int cpu;
1831		struct ndt_stats ndst;
1832
1833		memset(&ndst, 0, sizeof(ndst));
1834
1835		for_each_possible_cpu(cpu) {
1836			struct neigh_statistics	*st;
1837
1838			st = per_cpu_ptr(tbl->stats, cpu);
1839			ndst.ndts_allocs		+= st->allocs;
1840			ndst.ndts_destroys		+= st->destroys;
1841			ndst.ndts_hash_grows		+= st->hash_grows;
1842			ndst.ndts_res_failed		+= st->res_failed;
1843			ndst.ndts_lookups		+= st->lookups;
1844			ndst.ndts_hits			+= st->hits;
1845			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
1846			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
1847			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
1848			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
1849		}
1850
1851		if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1852			goto nla_put_failure;
1853	}
1854
1855	BUG_ON(tbl->parms.dev);
1856	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1857		goto nla_put_failure;
1858
1859	read_unlock_bh(&tbl->lock);
1860	nlmsg_end(skb, nlh);
1861	return 0;
1862
1863nla_put_failure:
1864	read_unlock_bh(&tbl->lock);
1865	nlmsg_cancel(skb, nlh);
1866	return -EMSGSIZE;
1867}
1868
1869static int neightbl_fill_param_info(struct sk_buff *skb,
1870				    struct neigh_table *tbl,
1871				    struct neigh_parms *parms,
1872				    u32 pid, u32 seq, int type,
1873				    unsigned int flags)
1874{
1875	struct ndtmsg *ndtmsg;
1876	struct nlmsghdr *nlh;
1877
1878	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1879	if (nlh == NULL)
1880		return -EMSGSIZE;
1881
1882	ndtmsg = nlmsg_data(nlh);
1883
1884	read_lock_bh(&tbl->lock);
1885	ndtmsg->ndtm_family = tbl->family;
1886	ndtmsg->ndtm_pad1   = 0;
1887	ndtmsg->ndtm_pad2   = 0;
1888
1889	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1890	    neightbl_fill_parms(skb, parms) < 0)
1891		goto errout;
1892
1893	read_unlock_bh(&tbl->lock);
1894	nlmsg_end(skb, nlh);
1895	return 0;
1896errout:
1897	read_unlock_bh(&tbl->lock);
1898	nlmsg_cancel(skb, nlh);
1899	return -EMSGSIZE;
1900}
1901
1902static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1903	[NDTA_NAME]		= { .type = NLA_STRING },
1904	[NDTA_THRESH1]		= { .type = NLA_U32 },
1905	[NDTA_THRESH2]		= { .type = NLA_U32 },
1906	[NDTA_THRESH3]		= { .type = NLA_U32 },
1907	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
1908	[NDTA_PARMS]		= { .type = NLA_NESTED },
1909};
1910
1911static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1912	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
1913	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
1914	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
1915	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
1916	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
1917	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
1918	[NDTPA_MCAST_REPROBES]		= { .type = NLA_U32 },
1919	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
1920	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
1921	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
1922	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
1923	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
1924	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
1925	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
1926};
1927
1928static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1929{
1930	struct net *net = sock_net(skb->sk);
1931	struct neigh_table *tbl;
1932	struct ndtmsg *ndtmsg;
1933	struct nlattr *tb[NDTA_MAX+1];
1934	bool found = false;
1935	int err, tidx;
1936
1937	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1938			  nl_neightbl_policy);
1939	if (err < 0)
1940		goto errout;
1941
1942	if (tb[NDTA_NAME] == NULL) {
1943		err = -EINVAL;
1944		goto errout;
1945	}
1946
1947	ndtmsg = nlmsg_data(nlh);
1948
1949	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
1950		tbl = neigh_tables[tidx];
1951		if (!tbl)
1952			continue;
1953		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1954			continue;
1955		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
1956			found = true;
1957			break;
1958		}
1959	}
1960
1961	if (!found)
1962		return -ENOENT;
1963
1964	/*
1965	 * We acquire tbl->lock to be nice to the periodic timers and
1966	 * make sure they always see a consistent set of values.
1967	 */
1968	write_lock_bh(&tbl->lock);
1969
1970	if (tb[NDTA_PARMS]) {
1971		struct nlattr *tbp[NDTPA_MAX+1];
1972		struct neigh_parms *p;
1973		int i, ifindex = 0;
1974
1975		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1976				       nl_ntbl_parm_policy);
1977		if (err < 0)
1978			goto errout_tbl_lock;
1979
1980		if (tbp[NDTPA_IFINDEX])
1981			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1982
1983		p = lookup_neigh_parms(tbl, net, ifindex);
1984		if (p == NULL) {
1985			err = -ENOENT;
1986			goto errout_tbl_lock;
1987		}
1988
1989		for (i = 1; i <= NDTPA_MAX; i++) {
1990			if (tbp[i] == NULL)
1991				continue;
1992
1993			switch (i) {
1994			case NDTPA_QUEUE_LEN:
1995				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
1996					      nla_get_u32(tbp[i]) *
1997					      SKB_TRUESIZE(ETH_FRAME_LEN));
1998				break;
1999			case NDTPA_QUEUE_LENBYTES:
2000				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2001					      nla_get_u32(tbp[i]));
2002				break;
2003			case NDTPA_PROXY_QLEN:
2004				NEIGH_VAR_SET(p, PROXY_QLEN,
2005					      nla_get_u32(tbp[i]));
2006				break;
2007			case NDTPA_APP_PROBES:
2008				NEIGH_VAR_SET(p, APP_PROBES,
2009					      nla_get_u32(tbp[i]));
2010				break;
2011			case NDTPA_UCAST_PROBES:
2012				NEIGH_VAR_SET(p, UCAST_PROBES,
2013					      nla_get_u32(tbp[i]));
2014				break;
2015			case NDTPA_MCAST_PROBES:
2016				NEIGH_VAR_SET(p, MCAST_PROBES,
2017					      nla_get_u32(tbp[i]));
2018				break;
2019			case NDTPA_MCAST_REPROBES:
2020				NEIGH_VAR_SET(p, MCAST_REPROBES,
2021					      nla_get_u32(tbp[i]));
2022				break;
2023			case NDTPA_BASE_REACHABLE_TIME:
2024				NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2025					      nla_get_msecs(tbp[i]));
2026				/* update reachable_time as well, otherwise, the change will
2027				 * only be effective after the next time neigh_periodic_work
2028				 * decides to recompute it (can be multiple minutes)
2029				 */
2030				p->reachable_time =
2031					neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2032				break;
2033			case NDTPA_GC_STALETIME:
2034				NEIGH_VAR_SET(p, GC_STALETIME,
2035					      nla_get_msecs(tbp[i]));
2036				break;
2037			case NDTPA_DELAY_PROBE_TIME:
2038				NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2039					      nla_get_msecs(tbp[i]));
2040				break;
2041			case NDTPA_RETRANS_TIME:
2042				NEIGH_VAR_SET(p, RETRANS_TIME,
2043					      nla_get_msecs(tbp[i]));
2044				break;
2045			case NDTPA_ANYCAST_DELAY:
2046				NEIGH_VAR_SET(p, ANYCAST_DELAY,
2047					      nla_get_msecs(tbp[i]));
2048				break;
2049			case NDTPA_PROXY_DELAY:
2050				NEIGH_VAR_SET(p, PROXY_DELAY,
2051					      nla_get_msecs(tbp[i]));
2052				break;
2053			case NDTPA_LOCKTIME:
2054				NEIGH_VAR_SET(p, LOCKTIME,
2055					      nla_get_msecs(tbp[i]));
2056				break;
2057			}
2058		}
2059	}
2060
2061	err = -ENOENT;
2062	if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2063	     tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2064	    !net_eq(net, &init_net))
2065		goto errout_tbl_lock;
2066
2067	if (tb[NDTA_THRESH1])
2068		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2069
2070	if (tb[NDTA_THRESH2])
2071		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2072
2073	if (tb[NDTA_THRESH3])
2074		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2075
2076	if (tb[NDTA_GC_INTERVAL])
2077		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2078
2079	err = 0;
2080
2081errout_tbl_lock:
2082	write_unlock_bh(&tbl->lock);
2083errout:
2084	return err;
2085}
2086
2087static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2088{
2089	struct net *net = sock_net(skb->sk);
2090	int family, tidx, nidx = 0;
2091	int tbl_skip = cb->args[0];
2092	int neigh_skip = cb->args[1];
2093	struct neigh_table *tbl;
2094
2095	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2096
2097	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2098		struct neigh_parms *p;
2099
2100		tbl = neigh_tables[tidx];
2101		if (!tbl)
2102			continue;
2103
2104		if (tidx < tbl_skip || (family && tbl->family != family))
2105			continue;
2106
2107		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2108				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2109				       NLM_F_MULTI) < 0)
2110			break;
2111
2112		nidx = 0;
2113		p = list_next_entry(&tbl->parms, list);
2114		list_for_each_entry_from(p, &tbl->parms_list, list) {
2115			if (!net_eq(neigh_parms_net(p), net))
2116				continue;
2117
2118			if (nidx < neigh_skip)
2119				goto next;
2120
2121			if (neightbl_fill_param_info(skb, tbl, p,
2122						     NETLINK_CB(cb->skb).portid,
2123						     cb->nlh->nlmsg_seq,
2124						     RTM_NEWNEIGHTBL,
2125						     NLM_F_MULTI) < 0)
2126				goto out;
2127		next:
2128			nidx++;
2129		}
2130
2131		neigh_skip = 0;
2132	}
2133out:
2134	cb->args[0] = tidx;
2135	cb->args[1] = nidx;
2136
2137	return skb->len;
2138}
2139
2140static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2141			   u32 pid, u32 seq, int type, unsigned int flags)
2142{
2143	unsigned long now = jiffies;
2144	struct nda_cacheinfo ci;
2145	struct nlmsghdr *nlh;
2146	struct ndmsg *ndm;
2147
2148	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2149	if (nlh == NULL)
2150		return -EMSGSIZE;
2151
2152	ndm = nlmsg_data(nlh);
2153	ndm->ndm_family	 = neigh->ops->family;
2154	ndm->ndm_pad1    = 0;
2155	ndm->ndm_pad2    = 0;
2156	ndm->ndm_flags	 = neigh->flags;
2157	ndm->ndm_type	 = neigh->type;
2158	ndm->ndm_ifindex = neigh->dev->ifindex;
2159
2160	if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2161		goto nla_put_failure;
2162
2163	read_lock_bh(&neigh->lock);
2164	ndm->ndm_state	 = neigh->nud_state;
2165	if (neigh->nud_state & NUD_VALID) {
2166		char haddr[MAX_ADDR_LEN];
2167
2168		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2169		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2170			read_unlock_bh(&neigh->lock);
2171			goto nla_put_failure;
2172		}
2173	}
2174
2175	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2176	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2177	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2178	ci.ndm_refcnt	 = atomic_read(&neigh->refcnt) - 1;
2179	read_unlock_bh(&neigh->lock);
2180
2181	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2182	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2183		goto nla_put_failure;
2184
2185	nlmsg_end(skb, nlh);
2186	return 0;
2187
2188nla_put_failure:
2189	nlmsg_cancel(skb, nlh);
2190	return -EMSGSIZE;
2191}
2192
2193static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2194			    u32 pid, u32 seq, int type, unsigned int flags,
2195			    struct neigh_table *tbl)
2196{
2197	struct nlmsghdr *nlh;
2198	struct ndmsg *ndm;
2199
2200	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2201	if (nlh == NULL)
2202		return -EMSGSIZE;
2203
2204	ndm = nlmsg_data(nlh);
2205	ndm->ndm_family	 = tbl->family;
2206	ndm->ndm_pad1    = 0;
2207	ndm->ndm_pad2    = 0;
2208	ndm->ndm_flags	 = pn->flags | NTF_PROXY;
2209	ndm->ndm_type	 = RTN_UNICAST;
2210	ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2211	ndm->ndm_state	 = NUD_NONE;
2212
2213	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2214		goto nla_put_failure;
2215
2216	nlmsg_end(skb, nlh);
2217	return 0;
2218
2219nla_put_failure:
2220	nlmsg_cancel(skb, nlh);
2221	return -EMSGSIZE;
2222}
2223
2224static void neigh_update_notify(struct neighbour *neigh)
2225{
2226	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2227	__neigh_notify(neigh, RTM_NEWNEIGH, 0);
2228}
2229
2230static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2231			    struct netlink_callback *cb)
2232{
2233	struct net *net = sock_net(skb->sk);
2234	struct neighbour *n;
2235	int rc, h, s_h = cb->args[1];
2236	int idx, s_idx = idx = cb->args[2];
2237	struct neigh_hash_table *nht;
2238
2239	rcu_read_lock_bh();
2240	nht = rcu_dereference_bh(tbl->nht);
2241
2242	for (h = s_h; h < (1 << nht->hash_shift); h++) {
2243		if (h > s_h)
2244			s_idx = 0;
2245		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2246		     n != NULL;
2247		     n = rcu_dereference_bh(n->next)) {
2248			if (!net_eq(dev_net(n->dev), net))
2249				continue;
2250			if (idx < s_idx)
2251				goto next;
2252			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2253					    cb->nlh->nlmsg_seq,
2254					    RTM_NEWNEIGH,
2255					    NLM_F_MULTI) < 0) {
2256				rc = -1;
2257				goto out;
2258			}
2259next:
2260			idx++;
2261		}
2262	}
2263	rc = skb->len;
2264out:
2265	rcu_read_unlock_bh();
2266	cb->args[1] = h;
2267	cb->args[2] = idx;
2268	return rc;
2269}
2270
2271static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2272			     struct netlink_callback *cb)
2273{
2274	struct pneigh_entry *n;
2275	struct net *net = sock_net(skb->sk);
2276	int rc, h, s_h = cb->args[3];
2277	int idx, s_idx = idx = cb->args[4];
2278
2279	read_lock_bh(&tbl->lock);
2280
2281	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2282		if (h > s_h)
2283			s_idx = 0;
2284		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2285			if (pneigh_net(n) != net)
2286				continue;
2287			if (idx < s_idx)
2288				goto next;
2289			if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2290					    cb->nlh->nlmsg_seq,
2291					    RTM_NEWNEIGH,
2292					    NLM_F_MULTI, tbl) < 0) {
2293				read_unlock_bh(&tbl->lock);
2294				rc = -1;
2295				goto out;
2296			}
2297		next:
2298			idx++;
2299		}
2300	}
2301
2302	read_unlock_bh(&tbl->lock);
2303	rc = skb->len;
2304out:
2305	cb->args[3] = h;
2306	cb->args[4] = idx;
2307	return rc;
2308
2309}
2310
2311static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2312{
2313	struct neigh_table *tbl;
2314	int t, family, s_t;
2315	int proxy = 0;
2316	int err;
2317
2318	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2319
2320	/* check for full ndmsg structure presence, family member is
2321	 * the same for both structures
2322	 */
2323	if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2324	    ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2325		proxy = 1;
2326
2327	s_t = cb->args[0];
2328
2329	for (t = 0; t < NEIGH_NR_TABLES; t++) {
2330		tbl = neigh_tables[t];
2331
2332		if (!tbl)
2333			continue;
2334		if (t < s_t || (family && tbl->family != family))
2335			continue;
2336		if (t > s_t)
2337			memset(&cb->args[1], 0, sizeof(cb->args) -
2338						sizeof(cb->args[0]));
2339		if (proxy)
2340			err = pneigh_dump_table(tbl, skb, cb);
2341		else
2342			err = neigh_dump_table(tbl, skb, cb);
2343		if (err < 0)
2344			break;
2345	}
2346
2347	cb->args[0] = t;
2348	return skb->len;
2349}
2350
2351void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2352{
2353	int chain;
2354	struct neigh_hash_table *nht;
2355
2356	rcu_read_lock_bh();
2357	nht = rcu_dereference_bh(tbl->nht);
2358
2359	read_lock(&tbl->lock); /* avoid resizes */
2360	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2361		struct neighbour *n;
2362
2363		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2364		     n != NULL;
2365		     n = rcu_dereference_bh(n->next))
2366			cb(n, cookie);
2367	}
2368	read_unlock(&tbl->lock);
2369	rcu_read_unlock_bh();
2370}
2371EXPORT_SYMBOL(neigh_for_each);
2372
2373/* The tbl->lock must be held as a writer and BH disabled. */
2374void __neigh_for_each_release(struct neigh_table *tbl,
2375			      int (*cb)(struct neighbour *))
2376{
2377	int chain;
2378	struct neigh_hash_table *nht;
2379
2380	nht = rcu_dereference_protected(tbl->nht,
2381					lockdep_is_held(&tbl->lock));
2382	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2383		struct neighbour *n;
2384		struct neighbour __rcu **np;
2385
2386		np = &nht->hash_buckets[chain];
2387		while ((n = rcu_dereference_protected(*np,
2388					lockdep_is_held(&tbl->lock))) != NULL) {
2389			int release;
2390
2391			write_lock(&n->lock);
2392			release = cb(n);
2393			if (release) {
2394				rcu_assign_pointer(*np,
2395					rcu_dereference_protected(n->next,
2396						lockdep_is_held(&tbl->lock)));
2397				n->dead = 1;
2398			} else
2399				np = &n->next;
2400			write_unlock(&n->lock);
2401			if (release)
2402				neigh_cleanup_and_release(n);
2403		}
2404	}
2405}
2406EXPORT_SYMBOL(__neigh_for_each_release);
2407
2408int neigh_xmit(int index, struct net_device *dev,
2409	       const void *addr, struct sk_buff *skb)
2410{
2411	int err = -EAFNOSUPPORT;
2412	if (likely(index < NEIGH_NR_TABLES)) {
2413		struct neigh_table *tbl;
2414		struct neighbour *neigh;
2415
2416		tbl = neigh_tables[index];
2417		if (!tbl)
2418			goto out;
2419		neigh = __neigh_lookup_noref(tbl, addr, dev);
2420		if (!neigh)
2421			neigh = __neigh_create(tbl, addr, dev, false);
2422		err = PTR_ERR(neigh);
2423		if (IS_ERR(neigh))
2424			goto out_kfree_skb;
2425		err = neigh->output(neigh, skb);
2426	}
2427	else if (index == NEIGH_LINK_TABLE) {
2428		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2429				      addr, NULL, skb->len);
2430		if (err < 0)
2431			goto out_kfree_skb;
2432		err = dev_queue_xmit(skb);
2433	}
2434out:
2435	return err;
2436out_kfree_skb:
2437	kfree_skb(skb);
2438	goto out;
2439}
2440EXPORT_SYMBOL(neigh_xmit);
2441
2442#ifdef CONFIG_PROC_FS
2443
2444static struct neighbour *neigh_get_first(struct seq_file *seq)
2445{
2446	struct neigh_seq_state *state = seq->private;
2447	struct net *net = seq_file_net(seq);
2448	struct neigh_hash_table *nht = state->nht;
2449	struct neighbour *n = NULL;
2450	int bucket = state->bucket;
2451
2452	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2453	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2454		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2455
2456		while (n) {
2457			if (!net_eq(dev_net(n->dev), net))
2458				goto next;
2459			if (state->neigh_sub_iter) {
2460				loff_t fakep = 0;
2461				void *v;
2462
2463				v = state->neigh_sub_iter(state, n, &fakep);
2464				if (!v)
2465					goto next;
2466			}
2467			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2468				break;
2469			if (n->nud_state & ~NUD_NOARP)
2470				break;
2471next:
2472			n = rcu_dereference_bh(n->next);
2473		}
2474
2475		if (n)
2476			break;
2477	}
2478	state->bucket = bucket;
2479
2480	return n;
2481}
2482
2483static struct neighbour *neigh_get_next(struct seq_file *seq,
2484					struct neighbour *n,
2485					loff_t *pos)
2486{
2487	struct neigh_seq_state *state = seq->private;
2488	struct net *net = seq_file_net(seq);
2489	struct neigh_hash_table *nht = state->nht;
2490
2491	if (state->neigh_sub_iter) {
2492		void *v = state->neigh_sub_iter(state, n, pos);
2493		if (v)
2494			return n;
2495	}
2496	n = rcu_dereference_bh(n->next);
2497
2498	while (1) {
2499		while (n) {
2500			if (!net_eq(dev_net(n->dev), net))
2501				goto next;
2502			if (state->neigh_sub_iter) {
2503				void *v = state->neigh_sub_iter(state, n, pos);
2504				if (v)
2505					return n;
2506				goto next;
2507			}
2508			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2509				break;
2510
2511			if (n->nud_state & ~NUD_NOARP)
2512				break;
2513next:
2514			n = rcu_dereference_bh(n->next);
2515		}
2516
2517		if (n)
2518			break;
2519
2520		if (++state->bucket >= (1 << nht->hash_shift))
2521			break;
2522
2523		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2524	}
2525
2526	if (n && pos)
2527		--(*pos);
2528	return n;
2529}
2530
2531static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2532{
2533	struct neighbour *n = neigh_get_first(seq);
2534
2535	if (n) {
2536		--(*pos);
2537		while (*pos) {
2538			n = neigh_get_next(seq, n, pos);
2539			if (!n)
2540				break;
2541		}
2542	}
2543	return *pos ? NULL : n;
2544}
2545
2546static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2547{
2548	struct neigh_seq_state *state = seq->private;
2549	struct net *net = seq_file_net(seq);
2550	struct neigh_table *tbl = state->tbl;
2551	struct pneigh_entry *pn = NULL;
2552	int bucket = state->bucket;
2553
2554	state->flags |= NEIGH_SEQ_IS_PNEIGH;
2555	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2556		pn = tbl->phash_buckets[bucket];
2557		while (pn && !net_eq(pneigh_net(pn), net))
2558			pn = pn->next;
2559		if (pn)
2560			break;
2561	}
2562	state->bucket = bucket;
2563
2564	return pn;
2565}
2566
2567static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2568					    struct pneigh_entry *pn,
2569					    loff_t *pos)
2570{
2571	struct neigh_seq_state *state = seq->private;
2572	struct net *net = seq_file_net(seq);
2573	struct neigh_table *tbl = state->tbl;
2574
2575	do {
2576		pn = pn->next;
2577	} while (pn && !net_eq(pneigh_net(pn), net));
2578
2579	while (!pn) {
2580		if (++state->bucket > PNEIGH_HASHMASK)
2581			break;
2582		pn = tbl->phash_buckets[state->bucket];
2583		while (pn && !net_eq(pneigh_net(pn), net))
2584			pn = pn->next;
2585		if (pn)
2586			break;
2587	}
2588
2589	if (pn && pos)
2590		--(*pos);
2591
2592	return pn;
2593}
2594
2595static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2596{
2597	struct pneigh_entry *pn = pneigh_get_first(seq);
2598
2599	if (pn) {
2600		--(*pos);
2601		while (*pos) {
2602			pn = pneigh_get_next(seq, pn, pos);
2603			if (!pn)
2604				break;
2605		}
2606	}
2607	return *pos ? NULL : pn;
2608}
2609
2610static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2611{
2612	struct neigh_seq_state *state = seq->private;
2613	void *rc;
2614	loff_t idxpos = *pos;
2615
2616	rc = neigh_get_idx(seq, &idxpos);
2617	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2618		rc = pneigh_get_idx(seq, &idxpos);
2619
2620	return rc;
2621}
2622
2623void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2624	__acquires(rcu_bh)
2625{
2626	struct neigh_seq_state *state = seq->private;
2627
2628	state->tbl = tbl;
2629	state->bucket = 0;
2630	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2631
2632	rcu_read_lock_bh();
2633	state->nht = rcu_dereference_bh(tbl->nht);
2634
2635	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2636}
2637EXPORT_SYMBOL(neigh_seq_start);
2638
2639void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2640{
2641	struct neigh_seq_state *state;
2642	void *rc;
2643
2644	if (v == SEQ_START_TOKEN) {
2645		rc = neigh_get_first(seq);
2646		goto out;
2647	}
2648
2649	state = seq->private;
2650	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2651		rc = neigh_get_next(seq, v, NULL);
2652		if (rc)
2653			goto out;
2654		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2655			rc = pneigh_get_first(seq);
2656	} else {
2657		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2658		rc = pneigh_get_next(seq, v, NULL);
2659	}
2660out:
2661	++(*pos);
2662	return rc;
2663}
2664EXPORT_SYMBOL(neigh_seq_next);
2665
2666void neigh_seq_stop(struct seq_file *seq, void *v)
2667	__releases(rcu_bh)
2668{
2669	rcu_read_unlock_bh();
2670}
2671EXPORT_SYMBOL(neigh_seq_stop);
2672
2673/* statistics via seq_file */
2674
2675static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2676{
2677	struct neigh_table *tbl = seq->private;
2678	int cpu;
2679
2680	if (*pos == 0)
2681		return SEQ_START_TOKEN;
2682
2683	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2684		if (!cpu_possible(cpu))
2685			continue;
2686		*pos = cpu+1;
2687		return per_cpu_ptr(tbl->stats, cpu);
2688	}
2689	return NULL;
2690}
2691
2692static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2693{
2694	struct neigh_table *tbl = seq->private;
2695	int cpu;
2696
2697	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2698		if (!cpu_possible(cpu))
2699			continue;
2700		*pos = cpu+1;
2701		return per_cpu_ptr(tbl->stats, cpu);
2702	}
2703	return NULL;
2704}
2705
2706static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2707{
2708
2709}
2710
2711static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2712{
2713	struct neigh_table *tbl = seq->private;
2714	struct neigh_statistics *st = v;
2715
2716	if (v == SEQ_START_TOKEN) {
2717		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2718		return 0;
2719	}
2720
2721	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2722			"%08lx %08lx  %08lx %08lx %08lx\n",
2723		   atomic_read(&tbl->entries),
2724
2725		   st->allocs,
2726		   st->destroys,
2727		   st->hash_grows,
2728
2729		   st->lookups,
2730		   st->hits,
2731
2732		   st->res_failed,
2733
2734		   st->rcv_probes_mcast,
2735		   st->rcv_probes_ucast,
2736
2737		   st->periodic_gc_runs,
2738		   st->forced_gc_runs,
2739		   st->unres_discards
2740		   );
2741
2742	return 0;
2743}
2744
2745static const struct seq_operations neigh_stat_seq_ops = {
2746	.start	= neigh_stat_seq_start,
2747	.next	= neigh_stat_seq_next,
2748	.stop	= neigh_stat_seq_stop,
2749	.show	= neigh_stat_seq_show,
2750};
2751
2752static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2753{
2754	int ret = seq_open(file, &neigh_stat_seq_ops);
2755
2756	if (!ret) {
2757		struct seq_file *sf = file->private_data;
2758		sf->private = PDE_DATA(inode);
2759	}
2760	return ret;
2761};
2762
2763static const struct file_operations neigh_stat_seq_fops = {
2764	.owner	 = THIS_MODULE,
2765	.open 	 = neigh_stat_seq_open,
2766	.read	 = seq_read,
2767	.llseek	 = seq_lseek,
2768	.release = seq_release,
2769};
2770
2771#endif /* CONFIG_PROC_FS */
2772
2773static inline size_t neigh_nlmsg_size(void)
2774{
2775	return NLMSG_ALIGN(sizeof(struct ndmsg))
2776	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2777	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2778	       + nla_total_size(sizeof(struct nda_cacheinfo))
2779	       + nla_total_size(4); /* NDA_PROBES */
2780}
2781
2782static void __neigh_notify(struct neighbour *n, int type, int flags)
2783{
2784	struct net *net = dev_net(n->dev);
2785	struct sk_buff *skb;
2786	int err = -ENOBUFS;
2787
2788	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2789	if (skb == NULL)
2790		goto errout;
2791
2792	err = neigh_fill_info(skb, n, 0, 0, type, flags);
2793	if (err < 0) {
2794		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2795		WARN_ON(err == -EMSGSIZE);
2796		kfree_skb(skb);
2797		goto errout;
2798	}
2799	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2800	return;
2801errout:
2802	if (err < 0)
2803		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2804}
2805
2806void neigh_app_ns(struct neighbour *n)
2807{
2808	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2809}
2810EXPORT_SYMBOL(neigh_app_ns);
2811
2812#ifdef CONFIG_SYSCTL
2813static int zero;
2814static int int_max = INT_MAX;
2815static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2816
2817static int proc_unres_qlen(struct ctl_table *ctl, int write,
2818			   void __user *buffer, size_t *lenp, loff_t *ppos)
2819{
2820	int size, ret;
2821	struct ctl_table tmp = *ctl;
2822
2823	tmp.extra1 = &zero;
2824	tmp.extra2 = &unres_qlen_max;
2825	tmp.data = &size;
2826
2827	size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2828	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2829
2830	if (write && !ret)
2831		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2832	return ret;
2833}
2834
2835static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2836						   int family)
2837{
2838	switch (family) {
2839	case AF_INET:
2840		return __in_dev_arp_parms_get_rcu(dev);
2841	case AF_INET6:
2842		return __in6_dev_nd_parms_get_rcu(dev);
2843	}
2844	return NULL;
2845}
2846
2847static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
2848				  int index)
2849{
2850	struct net_device *dev;
2851	int family = neigh_parms_family(p);
2852
2853	rcu_read_lock();
2854	for_each_netdev_rcu(net, dev) {
2855		struct neigh_parms *dst_p =
2856				neigh_get_dev_parms_rcu(dev, family);
2857
2858		if (dst_p && !test_bit(index, dst_p->data_state))
2859			dst_p->data[index] = p->data[index];
2860	}
2861	rcu_read_unlock();
2862}
2863
2864static void neigh_proc_update(struct ctl_table *ctl, int write)
2865{
2866	struct net_device *dev = ctl->extra1;
2867	struct neigh_parms *p = ctl->extra2;
2868	struct net *net = neigh_parms_net(p);
2869	int index = (int *) ctl->data - p->data;
2870
2871	if (!write)
2872		return;
2873
2874	set_bit(index, p->data_state);
2875	if (!dev) /* NULL dev means this is default value */
2876		neigh_copy_dflt_parms(net, p, index);
2877}
2878
2879static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
2880					   void __user *buffer,
2881					   size_t *lenp, loff_t *ppos)
2882{
2883	struct ctl_table tmp = *ctl;
2884	int ret;
2885
2886	tmp.extra1 = &zero;
2887	tmp.extra2 = &int_max;
2888
2889	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2890	neigh_proc_update(ctl, write);
2891	return ret;
2892}
2893
2894int neigh_proc_dointvec(struct ctl_table *ctl, int write,
2895			void __user *buffer, size_t *lenp, loff_t *ppos)
2896{
2897	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2898
2899	neigh_proc_update(ctl, write);
2900	return ret;
2901}
2902EXPORT_SYMBOL(neigh_proc_dointvec);
2903
2904int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
2905				void __user *buffer,
2906				size_t *lenp, loff_t *ppos)
2907{
2908	int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
2909
2910	neigh_proc_update(ctl, write);
2911	return ret;
2912}
2913EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
2914
2915static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
2916					      void __user *buffer,
2917					      size_t *lenp, loff_t *ppos)
2918{
2919	int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
2920
2921	neigh_proc_update(ctl, write);
2922	return ret;
2923}
2924
2925int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
2926				   void __user *buffer,
2927				   size_t *lenp, loff_t *ppos)
2928{
2929	int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
2930
2931	neigh_proc_update(ctl, write);
2932	return ret;
2933}
2934EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
2935
2936static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
2937					  void __user *buffer,
2938					  size_t *lenp, loff_t *ppos)
2939{
2940	int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
2941
2942	neigh_proc_update(ctl, write);
2943	return ret;
2944}
2945
2946static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
2947					  void __user *buffer,
2948					  size_t *lenp, loff_t *ppos)
2949{
2950	struct neigh_parms *p = ctl->extra2;
2951	int ret;
2952
2953	if (strcmp(ctl->procname, "base_reachable_time") == 0)
2954		ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
2955	else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
2956		ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
2957	else
2958		ret = -1;
2959
2960	if (write && ret == 0) {
2961		/* update reachable_time as well, otherwise, the change will
2962		 * only be effective after the next time neigh_periodic_work
2963		 * decides to recompute it
2964		 */
2965		p->reachable_time =
2966			neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2967	}
2968	return ret;
2969}
2970
2971#define NEIGH_PARMS_DATA_OFFSET(index)	\
2972	(&((struct neigh_parms *) 0)->data[index])
2973
2974#define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
2975	[NEIGH_VAR_ ## attr] = { \
2976		.procname	= name, \
2977		.data		= NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
2978		.maxlen		= sizeof(int), \
2979		.mode		= mval, \
2980		.proc_handler	= proc, \
2981	}
2982
2983#define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
2984	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
2985
2986#define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
2987	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
2988
2989#define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
2990	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
2991
2992#define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
2993	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
2994
2995#define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
2996	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
2997
2998#define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
2999	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3000
3001static struct neigh_sysctl_table {
3002	struct ctl_table_header *sysctl_header;
3003	struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3004} neigh_sysctl_template __read_mostly = {
3005	.neigh_vars = {
3006		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3007		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3008		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3009		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3010		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3011		NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3012		NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3013		NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3014		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3015		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3016		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3017		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3018		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3019		NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3020		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3021		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3022		[NEIGH_VAR_GC_INTERVAL] = {
3023			.procname	= "gc_interval",
3024			.maxlen		= sizeof(int),
3025			.mode		= 0644,
3026			.proc_handler	= proc_dointvec_jiffies,
3027		},
3028		[NEIGH_VAR_GC_THRESH1] = {
3029			.procname	= "gc_thresh1",
3030			.maxlen		= sizeof(int),
3031			.mode		= 0644,
3032			.extra1 	= &zero,
3033			.extra2		= &int_max,
3034			.proc_handler	= proc_dointvec_minmax,
3035		},
3036		[NEIGH_VAR_GC_THRESH2] = {
3037			.procname	= "gc_thresh2",
3038			.maxlen		= sizeof(int),
3039			.mode		= 0644,
3040			.extra1 	= &zero,
3041			.extra2		= &int_max,
3042			.proc_handler	= proc_dointvec_minmax,
3043		},
3044		[NEIGH_VAR_GC_THRESH3] = {
3045			.procname	= "gc_thresh3",
3046			.maxlen		= sizeof(int),
3047			.mode		= 0644,
3048			.extra1 	= &zero,
3049			.extra2		= &int_max,
3050			.proc_handler	= proc_dointvec_minmax,
3051		},
3052		{},
3053	},
3054};
3055
3056int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3057			  proc_handler *handler)
3058{
3059	int i;
3060	struct neigh_sysctl_table *t;
3061	const char *dev_name_source;
3062	char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3063	char *p_name;
3064
3065	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3066	if (!t)
3067		goto err;
3068
3069	for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3070		t->neigh_vars[i].data += (long) p;
3071		t->neigh_vars[i].extra1 = dev;
3072		t->neigh_vars[i].extra2 = p;
3073	}
3074
3075	if (dev) {
3076		dev_name_source = dev->name;
3077		/* Terminate the table early */
3078		memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3079		       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3080	} else {
3081		struct neigh_table *tbl = p->tbl;
3082		dev_name_source = "default";
3083		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3084		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3085		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3086		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3087	}
3088
3089	if (handler) {
3090		/* RetransTime */
3091		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3092		/* ReachableTime */
3093		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3094		/* RetransTime (in milliseconds)*/
3095		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3096		/* ReachableTime (in milliseconds) */
3097		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3098	} else {
3099		/* Those handlers will update p->reachable_time after
3100		 * base_reachable_time(_ms) is set to ensure the new timer starts being
3101		 * applied after the next neighbour update instead of waiting for
3102		 * neigh_periodic_work to update its value (can be multiple minutes)
3103		 * So any handler that replaces them should do this as well
3104		 */
3105		/* ReachableTime */
3106		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3107			neigh_proc_base_reachable_time;
3108		/* ReachableTime (in milliseconds) */
3109		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3110			neigh_proc_base_reachable_time;
3111	}
3112
3113	/* Don't export sysctls to unprivileged users */
3114	if (neigh_parms_net(p)->user_ns != &init_user_ns)
3115		t->neigh_vars[0].procname = NULL;
3116
3117	switch (neigh_parms_family(p)) {
3118	case AF_INET:
3119	      p_name = "ipv4";
3120	      break;
3121	case AF_INET6:
3122	      p_name = "ipv6";
3123	      break;
3124	default:
3125	      BUG();
3126	}
3127
3128	snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3129		p_name, dev_name_source);
3130	t->sysctl_header =
3131		register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3132	if (!t->sysctl_header)
3133		goto free;
3134
3135	p->sysctl_table = t;
3136	return 0;
3137
3138free:
3139	kfree(t);
3140err:
3141	return -ENOBUFS;
3142}
3143EXPORT_SYMBOL(neigh_sysctl_register);
3144
3145void neigh_sysctl_unregister(struct neigh_parms *p)
3146{
3147	if (p->sysctl_table) {
3148		struct neigh_sysctl_table *t = p->sysctl_table;
3149		p->sysctl_table = NULL;
3150		unregister_net_sysctl_table(t->sysctl_header);
3151		kfree(t);
3152	}
3153}
3154EXPORT_SYMBOL(neigh_sysctl_unregister);
3155
3156#endif	/* CONFIG_SYSCTL */
3157
3158static int __init neigh_init(void)
3159{
3160	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3161	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3162	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3163
3164	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3165		      NULL);
3166	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3167
3168	return 0;
3169}
3170
3171subsys_initcall(neigh_init);
3172
3173