1/*
2 *	Linux NET3:	GRE over IP protocol decoder.
3 *
4 *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 *	This program is free software; you can redistribute it and/or
7 *	modify it under the terms of the GNU General Public License
8 *	as published by the Free Software Foundation; either version
9 *	2 of the License, or (at your option) any later version.
10 *
11 */
12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15#include <linux/capability.h>
16#include <linux/module.h>
17#include <linux/types.h>
18#include <linux/kernel.h>
19#include <linux/slab.h>
20#include <asm/uaccess.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
27#include <linux/mroute.h>
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
33#include <linux/etherdevice.h>
34#include <linux/if_ether.h>
35
36#include <net/sock.h>
37#include <net/ip.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
40#include <net/ip_tunnels.h>
41#include <net/arp.h>
42#include <net/checksum.h>
43#include <net/dsfield.h>
44#include <net/inet_ecn.h>
45#include <net/xfrm.h>
46#include <net/net_namespace.h>
47#include <net/netns/generic.h>
48#include <net/rtnetlink.h>
49#include <net/gre.h>
50
51#if IS_ENABLED(CONFIG_IPV6)
52#include <net/ipv6.h>
53#include <net/ip6_fib.h>
54#include <net/ip6_route.h>
55#endif
56
57/*
58   Problems & solutions
59   --------------------
60
61   1. The most important issue is detecting local dead loops.
62   They would cause complete host lockup in transmit, which
63   would be "resolved" by stack overflow or, if queueing is enabled,
64   with infinite looping in net_bh.
65
66   We cannot track such dead loops during route installation,
67   it is infeasible task. The most general solutions would be
68   to keep skb->encapsulation counter (sort of local ttl),
69   and silently drop packet when it expires. It is a good
70   solution, but it supposes maintaining new variable in ALL
71   skb, even if no tunneling is used.
72
73   Current solution: xmit_recursion breaks dead loops. This is a percpu
74   counter, since when we enter the first ndo_xmit(), cpu migration is
75   forbidden. We force an exit if this counter reaches RECURSION_LIMIT
76
77   2. Networking dead loops would not kill routers, but would really
78   kill network. IP hop limit plays role of "t->recursion" in this case,
79   if we copy it from packet being encapsulated to upper header.
80   It is very good solution, but it introduces two problems:
81
82   - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83     do not work over tunnels.
84   - traceroute does not work. I planned to relay ICMP from tunnel,
85     so that this problem would be solved and traceroute output
86     would even more informative. This idea appeared to be wrong:
87     only Linux complies to rfc1812 now (yes, guys, Linux is the only
88     true router now :-)), all routers (at least, in neighbourhood of mine)
89     return only 8 bytes of payload. It is the end.
90
91   Hence, if we want that OSPF worked or traceroute said something reasonable,
92   we should search for another solution.
93
94   One of them is to parse packet trying to detect inner encapsulation
95   made by our node. It is difficult or even impossible, especially,
96   taking into account fragmentation. TO be short, ttl is not solution at all.
97
98   Current solution: The solution was UNEXPECTEDLY SIMPLE.
99   We force DF flag on tunnels with preconfigured hop limit,
100   that is ALL. :-) Well, it does not remove the problem completely,
101   but exponential growth of network traffic is changed to linear
102   (branches, that exceed pmtu are pruned) and tunnel mtu
103   rapidly degrades to value <68, where looping stops.
104   Yes, it is not good if there exists a router in the loop,
105   which does not force DF, even when encapsulating packets have DF set.
106   But it is not our problem! Nobody could accuse us, we made
107   all that we could make. Even if it is your gated who injected
108   fatal route to network, even if it were you who configured
109   fatal static route: you are innocent. :-)
110
111   Alexey Kuznetsov.
112 */
113
114static bool log_ecn_error = true;
115module_param(log_ecn_error, bool, 0644);
116MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
117
118static struct rtnl_link_ops ipgre_link_ops __read_mostly;
119static int ipgre_tunnel_init(struct net_device *dev);
120
121static int ipgre_net_id __read_mostly;
122static int gre_tap_net_id __read_mostly;
123
124static int ipgre_err(struct sk_buff *skb, u32 info,
125		     const struct tnl_ptk_info *tpi)
126{
127
128	/* All the routers (except for Linux) return only
129	   8 bytes of packet payload. It means, that precise relaying of
130	   ICMP in the real Internet is absolutely infeasible.
131
132	   Moreover, Cisco "wise men" put GRE key to the third word
133	   in GRE header. It makes impossible maintaining even soft
134	   state for keyed GRE tunnels with enabled checksum. Tell
135	   them "thank you".
136
137	   Well, I wonder, rfc1812 was written by Cisco employee,
138	   what the hell these idiots break standards established
139	   by themselves???
140	   */
141	struct net *net = dev_net(skb->dev);
142	struct ip_tunnel_net *itn;
143	const struct iphdr *iph;
144	const int type = icmp_hdr(skb)->type;
145	const int code = icmp_hdr(skb)->code;
146	struct ip_tunnel *t;
147
148	switch (type) {
149	default:
150	case ICMP_PARAMETERPROB:
151		return PACKET_RCVD;
152
153	case ICMP_DEST_UNREACH:
154		switch (code) {
155		case ICMP_SR_FAILED:
156		case ICMP_PORT_UNREACH:
157			/* Impossible event. */
158			return PACKET_RCVD;
159		default:
160			/* All others are translated to HOST_UNREACH.
161			   rfc2003 contains "deep thoughts" about NET_UNREACH,
162			   I believe they are just ether pollution. --ANK
163			 */
164			break;
165		}
166		break;
167	case ICMP_TIME_EXCEEDED:
168		if (code != ICMP_EXC_TTL)
169			return PACKET_RCVD;
170		break;
171
172	case ICMP_REDIRECT:
173		break;
174	}
175
176	if (tpi->proto == htons(ETH_P_TEB))
177		itn = net_generic(net, gre_tap_net_id);
178	else
179		itn = net_generic(net, ipgre_net_id);
180
181	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
182	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
183			     iph->daddr, iph->saddr, tpi->key);
184
185	if (!t)
186		return PACKET_REJECT;
187
188	if (t->parms.iph.daddr == 0 ||
189	    ipv4_is_multicast(t->parms.iph.daddr))
190		return PACKET_RCVD;
191
192	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
193		return PACKET_RCVD;
194
195	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
196		t->err_count++;
197	else
198		t->err_count = 1;
199	t->err_time = jiffies;
200	return PACKET_RCVD;
201}
202
203static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
204{
205	struct net *net = dev_net(skb->dev);
206	struct ip_tunnel_net *itn;
207	const struct iphdr *iph;
208	struct ip_tunnel *tunnel;
209
210	if (tpi->proto == htons(ETH_P_TEB))
211		itn = net_generic(net, gre_tap_net_id);
212	else
213		itn = net_generic(net, ipgre_net_id);
214
215	iph = ip_hdr(skb);
216	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
217				  iph->saddr, iph->daddr, tpi->key);
218
219	if (tunnel) {
220		skb_pop_mac_header(skb);
221		ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error);
222		return PACKET_RCVD;
223	}
224	return PACKET_REJECT;
225}
226
227static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
228		       const struct iphdr *tnl_params,
229		       __be16 proto)
230{
231	struct ip_tunnel *tunnel = netdev_priv(dev);
232	struct tnl_ptk_info tpi;
233
234	tpi.flags = tunnel->parms.o_flags;
235	tpi.proto = proto;
236	tpi.key = tunnel->parms.o_key;
237	if (tunnel->parms.o_flags & TUNNEL_SEQ)
238		tunnel->o_seqno++;
239	tpi.seq = htonl(tunnel->o_seqno);
240
241	/* Push GRE header. */
242	gre_build_header(skb, &tpi, tunnel->tun_hlen);
243
244	skb_set_inner_protocol(skb, tpi.proto);
245
246	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
247}
248
249static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
250			      struct net_device *dev)
251{
252	struct ip_tunnel *tunnel = netdev_priv(dev);
253	const struct iphdr *tnl_params;
254
255	if (dev->header_ops) {
256		/* Need space for new headers */
257		if (skb_cow_head(skb, dev->needed_headroom -
258				      (tunnel->hlen + sizeof(struct iphdr))))
259			goto free_skb;
260
261		tnl_params = (const struct iphdr *)skb->data;
262
263		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
264		 * to gre header.
265		 */
266		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
267		skb_reset_mac_header(skb);
268	} else {
269		if (skb_cow_head(skb, dev->needed_headroom))
270			goto free_skb;
271
272		tnl_params = &tunnel->parms.iph;
273	}
274
275	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
276	if (IS_ERR(skb))
277		goto out;
278
279	__gre_xmit(skb, dev, tnl_params, skb->protocol);
280
281	return NETDEV_TX_OK;
282
283free_skb:
284	kfree_skb(skb);
285out:
286	dev->stats.tx_dropped++;
287	return NETDEV_TX_OK;
288}
289
290static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
291				struct net_device *dev)
292{
293	struct ip_tunnel *tunnel = netdev_priv(dev);
294
295	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
296	if (IS_ERR(skb))
297		goto out;
298
299	if (skb_cow_head(skb, dev->needed_headroom))
300		goto free_skb;
301
302	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
303
304	return NETDEV_TX_OK;
305
306free_skb:
307	kfree_skb(skb);
308out:
309	dev->stats.tx_dropped++;
310	return NETDEV_TX_OK;
311}
312
313static int ipgre_tunnel_ioctl(struct net_device *dev,
314			      struct ifreq *ifr, int cmd)
315{
316	int err;
317	struct ip_tunnel_parm p;
318
319	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
320		return -EFAULT;
321	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
322		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
323		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
324		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
325			return -EINVAL;
326	}
327	p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
328	p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
329
330	err = ip_tunnel_ioctl(dev, &p, cmd);
331	if (err)
332		return err;
333
334	p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
335	p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
336
337	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
338		return -EFAULT;
339	return 0;
340}
341
342/* Nice toy. Unfortunately, useless in real life :-)
343   It allows to construct virtual multiprotocol broadcast "LAN"
344   over the Internet, provided multicast routing is tuned.
345
346
347   I have no idea was this bicycle invented before me,
348   so that I had to set ARPHRD_IPGRE to a random value.
349   I have an impression, that Cisco could make something similar,
350   but this feature is apparently missing in IOS<=11.2(8).
351
352   I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
353   with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
354
355   ping -t 255 224.66.66.66
356
357   If nobody answers, mbone does not work.
358
359   ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
360   ip addr add 10.66.66.<somewhat>/24 dev Universe
361   ifconfig Universe up
362   ifconfig Universe add fe80::<Your_real_addr>/10
363   ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
364   ftp 10.66.66.66
365   ...
366   ftp fec0:6666:6666::193.233.7.65
367   ...
368 */
369static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
370			unsigned short type,
371			const void *daddr, const void *saddr, unsigned int len)
372{
373	struct ip_tunnel *t = netdev_priv(dev);
374	struct iphdr *iph;
375	struct gre_base_hdr *greh;
376
377	iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
378	greh = (struct gre_base_hdr *)(iph+1);
379	greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
380	greh->protocol = htons(type);
381
382	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
383
384	/* Set the source hardware address. */
385	if (saddr)
386		memcpy(&iph->saddr, saddr, 4);
387	if (daddr)
388		memcpy(&iph->daddr, daddr, 4);
389	if (iph->daddr)
390		return t->hlen + sizeof(*iph);
391
392	return -(t->hlen + sizeof(*iph));
393}
394
395static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
396{
397	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
398	memcpy(haddr, &iph->saddr, 4);
399	return 4;
400}
401
402static const struct header_ops ipgre_header_ops = {
403	.create	= ipgre_header,
404	.parse	= ipgre_header_parse,
405};
406
407#ifdef CONFIG_NET_IPGRE_BROADCAST
408static int ipgre_open(struct net_device *dev)
409{
410	struct ip_tunnel *t = netdev_priv(dev);
411
412	if (ipv4_is_multicast(t->parms.iph.daddr)) {
413		struct flowi4 fl4;
414		struct rtable *rt;
415
416		rt = ip_route_output_gre(t->net, &fl4,
417					 t->parms.iph.daddr,
418					 t->parms.iph.saddr,
419					 t->parms.o_key,
420					 RT_TOS(t->parms.iph.tos),
421					 t->parms.link);
422		if (IS_ERR(rt))
423			return -EADDRNOTAVAIL;
424		dev = rt->dst.dev;
425		ip_rt_put(rt);
426		if (!__in_dev_get_rtnl(dev))
427			return -EADDRNOTAVAIL;
428		t->mlink = dev->ifindex;
429		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
430	}
431	return 0;
432}
433
434static int ipgre_close(struct net_device *dev)
435{
436	struct ip_tunnel *t = netdev_priv(dev);
437
438	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
439		struct in_device *in_dev;
440		in_dev = inetdev_by_index(t->net, t->mlink);
441		if (in_dev)
442			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
443	}
444	return 0;
445}
446#endif
447
448static const struct net_device_ops ipgre_netdev_ops = {
449	.ndo_init		= ipgre_tunnel_init,
450	.ndo_uninit		= ip_tunnel_uninit,
451#ifdef CONFIG_NET_IPGRE_BROADCAST
452	.ndo_open		= ipgre_open,
453	.ndo_stop		= ipgre_close,
454#endif
455	.ndo_start_xmit		= ipgre_xmit,
456	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
457	.ndo_change_mtu		= ip_tunnel_change_mtu,
458	.ndo_get_stats64	= ip_tunnel_get_stats64,
459	.ndo_get_iflink		= ip_tunnel_get_iflink,
460};
461
462#define GRE_FEATURES (NETIF_F_SG |		\
463		      NETIF_F_FRAGLIST |	\
464		      NETIF_F_HIGHDMA |		\
465		      NETIF_F_HW_CSUM)
466
467static void ipgre_tunnel_setup(struct net_device *dev)
468{
469	dev->netdev_ops		= &ipgre_netdev_ops;
470	dev->type		= ARPHRD_IPGRE;
471	ip_tunnel_setup(dev, ipgre_net_id);
472}
473
474static void __gre_tunnel_init(struct net_device *dev)
475{
476	struct ip_tunnel *tunnel;
477	int t_hlen;
478
479	tunnel = netdev_priv(dev);
480	tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
481	tunnel->parms.iph.protocol = IPPROTO_GRE;
482
483	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
484
485	t_hlen = tunnel->hlen + sizeof(struct iphdr);
486
487	dev->needed_headroom	= LL_MAX_HEADER + t_hlen + 4;
488	dev->mtu		= ETH_DATA_LEN - t_hlen - 4;
489
490	dev->features		|= GRE_FEATURES;
491	dev->hw_features	|= GRE_FEATURES;
492
493	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
494		/* TCP offload with GRE SEQ is not supported. */
495		dev->features    |= NETIF_F_GSO_SOFTWARE;
496		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
497		/* Can use a lockless transmit, unless we generate
498		 * output sequences
499		 */
500		dev->features |= NETIF_F_LLTX;
501	}
502}
503
504static int ipgre_tunnel_init(struct net_device *dev)
505{
506	struct ip_tunnel *tunnel = netdev_priv(dev);
507	struct iphdr *iph = &tunnel->parms.iph;
508
509	__gre_tunnel_init(dev);
510
511	memcpy(dev->dev_addr, &iph->saddr, 4);
512	memcpy(dev->broadcast, &iph->daddr, 4);
513
514	dev->flags		= IFF_NOARP;
515	netif_keep_dst(dev);
516	dev->addr_len		= 4;
517
518	if (iph->daddr) {
519#ifdef CONFIG_NET_IPGRE_BROADCAST
520		if (ipv4_is_multicast(iph->daddr)) {
521			if (!iph->saddr)
522				return -EINVAL;
523			dev->flags = IFF_BROADCAST;
524			dev->header_ops = &ipgre_header_ops;
525		}
526#endif
527	} else
528		dev->header_ops = &ipgre_header_ops;
529
530	return ip_tunnel_init(dev);
531}
532
533static struct gre_cisco_protocol ipgre_protocol = {
534	.handler        = ipgre_rcv,
535	.err_handler    = ipgre_err,
536	.priority       = 0,
537};
538
539static int __net_init ipgre_init_net(struct net *net)
540{
541	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
542}
543
544static void __net_exit ipgre_exit_net(struct net *net)
545{
546	struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
547	ip_tunnel_delete_net(itn, &ipgre_link_ops);
548}
549
550static struct pernet_operations ipgre_net_ops = {
551	.init = ipgre_init_net,
552	.exit = ipgre_exit_net,
553	.id   = &ipgre_net_id,
554	.size = sizeof(struct ip_tunnel_net),
555};
556
557static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
558{
559	__be16 flags;
560
561	if (!data)
562		return 0;
563
564	flags = 0;
565	if (data[IFLA_GRE_IFLAGS])
566		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
567	if (data[IFLA_GRE_OFLAGS])
568		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
569	if (flags & (GRE_VERSION|GRE_ROUTING))
570		return -EINVAL;
571
572	return 0;
573}
574
575static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
576{
577	__be32 daddr;
578
579	if (tb[IFLA_ADDRESS]) {
580		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
581			return -EINVAL;
582		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
583			return -EADDRNOTAVAIL;
584	}
585
586	if (!data)
587		goto out;
588
589	if (data[IFLA_GRE_REMOTE]) {
590		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
591		if (!daddr)
592			return -EINVAL;
593	}
594
595out:
596	return ipgre_tunnel_validate(tb, data);
597}
598
599static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
600			       struct ip_tunnel_parm *parms)
601{
602	memset(parms, 0, sizeof(*parms));
603
604	parms->iph.protocol = IPPROTO_GRE;
605
606	if (!data)
607		return;
608
609	if (data[IFLA_GRE_LINK])
610		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
611
612	if (data[IFLA_GRE_IFLAGS])
613		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
614
615	if (data[IFLA_GRE_OFLAGS])
616		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
617
618	if (data[IFLA_GRE_IKEY])
619		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
620
621	if (data[IFLA_GRE_OKEY])
622		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
623
624	if (data[IFLA_GRE_LOCAL])
625		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
626
627	if (data[IFLA_GRE_REMOTE])
628		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
629
630	if (data[IFLA_GRE_TTL])
631		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
632
633	if (data[IFLA_GRE_TOS])
634		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
635
636	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
637		parms->iph.frag_off = htons(IP_DF);
638}
639
640/* This function returns true when ENCAP attributes are present in the nl msg */
641static bool ipgre_netlink_encap_parms(struct nlattr *data[],
642				      struct ip_tunnel_encap *ipencap)
643{
644	bool ret = false;
645
646	memset(ipencap, 0, sizeof(*ipencap));
647
648	if (!data)
649		return ret;
650
651	if (data[IFLA_GRE_ENCAP_TYPE]) {
652		ret = true;
653		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
654	}
655
656	if (data[IFLA_GRE_ENCAP_FLAGS]) {
657		ret = true;
658		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
659	}
660
661	if (data[IFLA_GRE_ENCAP_SPORT]) {
662		ret = true;
663		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
664	}
665
666	if (data[IFLA_GRE_ENCAP_DPORT]) {
667		ret = true;
668		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
669	}
670
671	return ret;
672}
673
674static int gre_tap_init(struct net_device *dev)
675{
676	__gre_tunnel_init(dev);
677	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
678
679	return ip_tunnel_init(dev);
680}
681
682static const struct net_device_ops gre_tap_netdev_ops = {
683	.ndo_init		= gre_tap_init,
684	.ndo_uninit		= ip_tunnel_uninit,
685	.ndo_start_xmit		= gre_tap_xmit,
686	.ndo_set_mac_address 	= eth_mac_addr,
687	.ndo_validate_addr	= eth_validate_addr,
688	.ndo_change_mtu		= ip_tunnel_change_mtu,
689	.ndo_get_stats64	= ip_tunnel_get_stats64,
690	.ndo_get_iflink		= ip_tunnel_get_iflink,
691};
692
693static void ipgre_tap_setup(struct net_device *dev)
694{
695	ether_setup(dev);
696	dev->netdev_ops		= &gre_tap_netdev_ops;
697	dev->priv_flags 	|= IFF_LIVE_ADDR_CHANGE;
698	ip_tunnel_setup(dev, gre_tap_net_id);
699}
700
701static int ipgre_newlink(struct net *src_net, struct net_device *dev,
702			 struct nlattr *tb[], struct nlattr *data[])
703{
704	struct ip_tunnel_parm p;
705	struct ip_tunnel_encap ipencap;
706
707	if (ipgre_netlink_encap_parms(data, &ipencap)) {
708		struct ip_tunnel *t = netdev_priv(dev);
709		int err = ip_tunnel_encap_setup(t, &ipencap);
710
711		if (err < 0)
712			return err;
713	}
714
715	ipgre_netlink_parms(data, tb, &p);
716	return ip_tunnel_newlink(dev, tb, &p);
717}
718
719static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
720			    struct nlattr *data[])
721{
722	struct ip_tunnel_parm p;
723	struct ip_tunnel_encap ipencap;
724
725	if (ipgre_netlink_encap_parms(data, &ipencap)) {
726		struct ip_tunnel *t = netdev_priv(dev);
727		int err = ip_tunnel_encap_setup(t, &ipencap);
728
729		if (err < 0)
730			return err;
731	}
732
733	ipgre_netlink_parms(data, tb, &p);
734	return ip_tunnel_changelink(dev, tb, &p);
735}
736
737static size_t ipgre_get_size(const struct net_device *dev)
738{
739	return
740		/* IFLA_GRE_LINK */
741		nla_total_size(4) +
742		/* IFLA_GRE_IFLAGS */
743		nla_total_size(2) +
744		/* IFLA_GRE_OFLAGS */
745		nla_total_size(2) +
746		/* IFLA_GRE_IKEY */
747		nla_total_size(4) +
748		/* IFLA_GRE_OKEY */
749		nla_total_size(4) +
750		/* IFLA_GRE_LOCAL */
751		nla_total_size(4) +
752		/* IFLA_GRE_REMOTE */
753		nla_total_size(4) +
754		/* IFLA_GRE_TTL */
755		nla_total_size(1) +
756		/* IFLA_GRE_TOS */
757		nla_total_size(1) +
758		/* IFLA_GRE_PMTUDISC */
759		nla_total_size(1) +
760		/* IFLA_GRE_ENCAP_TYPE */
761		nla_total_size(2) +
762		/* IFLA_GRE_ENCAP_FLAGS */
763		nla_total_size(2) +
764		/* IFLA_GRE_ENCAP_SPORT */
765		nla_total_size(2) +
766		/* IFLA_GRE_ENCAP_DPORT */
767		nla_total_size(2) +
768		0;
769}
770
771static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
772{
773	struct ip_tunnel *t = netdev_priv(dev);
774	struct ip_tunnel_parm *p = &t->parms;
775
776	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
777	    nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
778	    nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
779	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
780	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
781	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
782	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
783	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
784	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
785	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
786		       !!(p->iph.frag_off & htons(IP_DF))))
787		goto nla_put_failure;
788
789	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
790			t->encap.type) ||
791	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
792			 t->encap.sport) ||
793	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
794			 t->encap.dport) ||
795	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
796			t->encap.flags))
797		goto nla_put_failure;
798
799	return 0;
800
801nla_put_failure:
802	return -EMSGSIZE;
803}
804
805static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
806	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
807	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
808	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
809	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
810	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
811	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
812	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
813	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
814	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
815	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
816	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
817	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
818	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
819	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
820};
821
822static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
823	.kind		= "gre",
824	.maxtype	= IFLA_GRE_MAX,
825	.policy		= ipgre_policy,
826	.priv_size	= sizeof(struct ip_tunnel),
827	.setup		= ipgre_tunnel_setup,
828	.validate	= ipgre_tunnel_validate,
829	.newlink	= ipgre_newlink,
830	.changelink	= ipgre_changelink,
831	.dellink	= ip_tunnel_dellink,
832	.get_size	= ipgre_get_size,
833	.fill_info	= ipgre_fill_info,
834	.get_link_net	= ip_tunnel_get_link_net,
835};
836
837static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
838	.kind		= "gretap",
839	.maxtype	= IFLA_GRE_MAX,
840	.policy		= ipgre_policy,
841	.priv_size	= sizeof(struct ip_tunnel),
842	.setup		= ipgre_tap_setup,
843	.validate	= ipgre_tap_validate,
844	.newlink	= ipgre_newlink,
845	.changelink	= ipgre_changelink,
846	.dellink	= ip_tunnel_dellink,
847	.get_size	= ipgre_get_size,
848	.fill_info	= ipgre_fill_info,
849	.get_link_net	= ip_tunnel_get_link_net,
850};
851
852static int __net_init ipgre_tap_init_net(struct net *net)
853{
854	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL);
855}
856
857static void __net_exit ipgre_tap_exit_net(struct net *net)
858{
859	struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
860	ip_tunnel_delete_net(itn, &ipgre_tap_ops);
861}
862
863static struct pernet_operations ipgre_tap_net_ops = {
864	.init = ipgre_tap_init_net,
865	.exit = ipgre_tap_exit_net,
866	.id   = &gre_tap_net_id,
867	.size = sizeof(struct ip_tunnel_net),
868};
869
870static int __init ipgre_init(void)
871{
872	int err;
873
874	pr_info("GRE over IPv4 tunneling driver\n");
875
876	err = register_pernet_device(&ipgre_net_ops);
877	if (err < 0)
878		return err;
879
880	err = register_pernet_device(&ipgre_tap_net_ops);
881	if (err < 0)
882		goto pnet_tap_faied;
883
884	err = gre_cisco_register(&ipgre_protocol);
885	if (err < 0) {
886		pr_info("%s: can't add protocol\n", __func__);
887		goto add_proto_failed;
888	}
889
890	err = rtnl_link_register(&ipgre_link_ops);
891	if (err < 0)
892		goto rtnl_link_failed;
893
894	err = rtnl_link_register(&ipgre_tap_ops);
895	if (err < 0)
896		goto tap_ops_failed;
897
898	return 0;
899
900tap_ops_failed:
901	rtnl_link_unregister(&ipgre_link_ops);
902rtnl_link_failed:
903	gre_cisco_unregister(&ipgre_protocol);
904add_proto_failed:
905	unregister_pernet_device(&ipgre_tap_net_ops);
906pnet_tap_faied:
907	unregister_pernet_device(&ipgre_net_ops);
908	return err;
909}
910
911static void __exit ipgre_fini(void)
912{
913	rtnl_link_unregister(&ipgre_tap_ops);
914	rtnl_link_unregister(&ipgre_link_ops);
915	gre_cisco_unregister(&ipgre_protocol);
916	unregister_pernet_device(&ipgre_tap_net_ops);
917	unregister_pernet_device(&ipgre_net_ops);
918}
919
920module_init(ipgre_init);
921module_exit(ipgre_fini);
922MODULE_LICENSE("GPL");
923MODULE_ALIAS_RTNL_LINK("gre");
924MODULE_ALIAS_RTNL_LINK("gretap");
925MODULE_ALIAS_NETDEV("gre0");
926MODULE_ALIAS_NETDEV("gretap0");
927