1/* IP Virtual Server
2 * data structure and functionality definitions
3 */
4
5#ifndef _NET_IP_VS_H
6#define _NET_IP_VS_H
7
8#include <linux/ip_vs.h>                /* definitions shared with userland */
9
10#include <asm/types.h>                  /* for __uXX types */
11
12#include <linux/list.h>                 /* for struct list_head */
13#include <linux/spinlock.h>             /* for struct rwlock_t */
14#include <linux/atomic.h>               /* for struct atomic_t */
15#include <linux/compiler.h>
16#include <linux/timer.h>
17#include <linux/bug.h>
18
19#include <net/checksum.h>
20#include <linux/netfilter.h>		/* for union nf_inet_addr */
21#include <linux/ip.h>
22#include <linux/ipv6.h>			/* for struct ipv6hdr */
23#include <net/ipv6.h>
24#if IS_ENABLED(CONFIG_IP_VS_IPV6)
25#include <linux/netfilter_ipv6/ip6_tables.h>
26#endif
27#if IS_ENABLED(CONFIG_NF_CONNTRACK)
28#include <net/netfilter/nf_conntrack.h>
29#endif
30#include <net/net_namespace.h>		/* Netw namespace */
31
32/* Generic access of ipvs struct */
33static inline struct netns_ipvs *net_ipvs(struct net* net)
34{
35	return net->ipvs;
36}
37
38/* Get net ptr from skb in traffic cases
39 * use skb_sknet when call is from userland (ioctl or netlink)
40 */
41static inline struct net *skb_net(const struct sk_buff *skb)
42{
43#ifdef CONFIG_NET_NS
44#ifdef CONFIG_IP_VS_DEBUG
45	/*
46	 * This is used for debug only.
47	 * Start with the most likely hit
48	 * End with BUG
49	 */
50	if (likely(skb->dev && dev_net(skb->dev)))
51		return dev_net(skb->dev);
52	if (skb_dst(skb) && skb_dst(skb)->dev)
53		return dev_net(skb_dst(skb)->dev);
54	WARN(skb->sk, "Maybe skb_sknet should be used in %s() at line:%d\n",
55		      __func__, __LINE__);
56	if (likely(skb->sk && sock_net(skb->sk)))
57		return sock_net(skb->sk);
58	pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
59		__func__, __LINE__);
60	BUG();
61#else
62	return dev_net(skb->dev ? : skb_dst(skb)->dev);
63#endif
64#else
65	return &init_net;
66#endif
67}
68
69static inline struct net *skb_sknet(const struct sk_buff *skb)
70{
71#ifdef CONFIG_NET_NS
72#ifdef CONFIG_IP_VS_DEBUG
73	/* Start with the most likely hit */
74	if (likely(skb->sk && sock_net(skb->sk)))
75		return sock_net(skb->sk);
76	WARN(skb->dev, "Maybe skb_net should be used instead in %s() line:%d\n",
77		       __func__, __LINE__);
78	if (likely(skb->dev && dev_net(skb->dev)))
79		return dev_net(skb->dev);
80	pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
81		__func__, __LINE__);
82	BUG();
83#else
84	return sock_net(skb->sk);
85#endif
86#else
87	return &init_net;
88#endif
89}
90
91/* This one needed for single_open_net since net is stored directly in
92 * private not as a struct i.e. seq_file_net can't be used.
93 */
94static inline struct net *seq_file_single_net(struct seq_file *seq)
95{
96#ifdef CONFIG_NET_NS
97	return (struct net *)seq->private;
98#else
99	return &init_net;
100#endif
101}
102
103/* Connections' size value needed by ip_vs_ctl.c */
104extern int ip_vs_conn_tab_size;
105
106struct ip_vs_iphdr {
107	__u32 len;	/* IPv4 simply where L4 starts
108			 * IPv6 where L4 Transport Header starts */
109	__u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/
110	__s16 protocol;
111	__s32 flags;
112	union nf_inet_addr saddr;
113	union nf_inet_addr daddr;
114};
115
116static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
117				      int len, void *buffer,
118				      const struct ip_vs_iphdr *ipvsh)
119{
120	return skb_header_pointer(skb, offset, len, buffer);
121}
122
123static inline void
124ip_vs_fill_ip4hdr(const void *nh, struct ip_vs_iphdr *iphdr)
125{
126	const struct iphdr *iph = nh;
127
128	iphdr->len	= iph->ihl * 4;
129	iphdr->fragoffs	= 0;
130	iphdr->protocol	= iph->protocol;
131	iphdr->saddr.ip	= iph->saddr;
132	iphdr->daddr.ip	= iph->daddr;
133}
134
135/* This function handles filling *ip_vs_iphdr, both for IPv4 and IPv6.
136 * IPv6 requires some extra work, as finding proper header position,
137 * depend on the IPv6 extension headers.
138 */
139static inline void
140ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, struct ip_vs_iphdr *iphdr)
141{
142#ifdef CONFIG_IP_VS_IPV6
143	if (af == AF_INET6) {
144		const struct ipv6hdr *iph =
145			(struct ipv6hdr *)skb_network_header(skb);
146		iphdr->saddr.in6 = iph->saddr;
147		iphdr->daddr.in6 = iph->daddr;
148		/* ipv6_find_hdr() updates len, flags */
149		iphdr->len	 = 0;
150		iphdr->flags	 = 0;
151		iphdr->protocol  = ipv6_find_hdr(skb, &iphdr->len, -1,
152						 &iphdr->fragoffs,
153						 &iphdr->flags);
154	} else
155#endif
156	{
157		const struct iphdr *iph =
158			(struct iphdr *)skb_network_header(skb);
159		iphdr->len	= iph->ihl * 4;
160		iphdr->fragoffs	= 0;
161		iphdr->protocol	= iph->protocol;
162		iphdr->saddr.ip	= iph->saddr;
163		iphdr->daddr.ip	= iph->daddr;
164	}
165}
166
167static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst,
168				   const union nf_inet_addr *src)
169{
170#ifdef CONFIG_IP_VS_IPV6
171	if (af == AF_INET6)
172		dst->in6 = src->in6;
173	else
174#endif
175	dst->ip = src->ip;
176}
177
178static inline void ip_vs_addr_set(int af, union nf_inet_addr *dst,
179				  const union nf_inet_addr *src)
180{
181#ifdef CONFIG_IP_VS_IPV6
182	if (af == AF_INET6) {
183		dst->in6 = src->in6;
184		return;
185	}
186#endif
187	dst->ip = src->ip;
188	dst->all[1] = 0;
189	dst->all[2] = 0;
190	dst->all[3] = 0;
191}
192
193static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a,
194				   const union nf_inet_addr *b)
195{
196#ifdef CONFIG_IP_VS_IPV6
197	if (af == AF_INET6)
198		return ipv6_addr_equal(&a->in6, &b->in6);
199#endif
200	return a->ip == b->ip;
201}
202
203#ifdef CONFIG_IP_VS_DEBUG
204#include <linux/net.h>
205
206int ip_vs_get_debug_level(void);
207
208static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
209					 const union nf_inet_addr *addr,
210					 int *idx)
211{
212	int len;
213#ifdef CONFIG_IP_VS_IPV6
214	if (af == AF_INET6)
215		len = snprintf(&buf[*idx], buf_len - *idx, "[%pI6c]",
216			       &addr->in6) + 1;
217	else
218#endif
219		len = snprintf(&buf[*idx], buf_len - *idx, "%pI4",
220			       &addr->ip) + 1;
221
222	*idx += len;
223	BUG_ON(*idx > buf_len + 1);
224	return &buf[*idx - len];
225}
226
227#define IP_VS_DBG_BUF(level, msg, ...)					\
228	do {								\
229		char ip_vs_dbg_buf[160];				\
230		int ip_vs_dbg_idx = 0;					\
231		if (level <= ip_vs_get_debug_level())			\
232			printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);	\
233	} while (0)
234#define IP_VS_ERR_BUF(msg...)						\
235	do {								\
236		char ip_vs_dbg_buf[160];				\
237		int ip_vs_dbg_idx = 0;					\
238		pr_err(msg);						\
239	} while (0)
240
241/* Only use from within IP_VS_DBG_BUF() or IP_VS_ERR_BUF macros */
242#define IP_VS_DBG_ADDR(af, addr)					\
243	ip_vs_dbg_addr(af, ip_vs_dbg_buf,				\
244		       sizeof(ip_vs_dbg_buf), addr,			\
245		       &ip_vs_dbg_idx)
246
247#define IP_VS_DBG(level, msg, ...)					\
248	do {								\
249		if (level <= ip_vs_get_debug_level())			\
250			printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);	\
251	} while (0)
252#define IP_VS_DBG_RL(msg, ...)						\
253	do {								\
254		if (net_ratelimit())					\
255			printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);	\
256	} while (0)
257#define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg)			\
258	do {								\
259		if (level <= ip_vs_get_debug_level())			\
260			pp->debug_packet(af, pp, skb, ofs, msg);	\
261	} while (0)
262#define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg)			\
263	do {								\
264		if (level <= ip_vs_get_debug_level() &&			\
265		    net_ratelimit())					\
266			pp->debug_packet(af, pp, skb, ofs, msg);	\
267	} while (0)
268#else	/* NO DEBUGGING at ALL */
269#define IP_VS_DBG_BUF(level, msg...)  do {} while (0)
270#define IP_VS_ERR_BUF(msg...)  do {} while (0)
271#define IP_VS_DBG(level, msg...)  do {} while (0)
272#define IP_VS_DBG_RL(msg...)  do {} while (0)
273#define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg)	do {} while (0)
274#define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg)	do {} while (0)
275#endif
276
277#define IP_VS_BUG() BUG()
278#define IP_VS_ERR_RL(msg, ...)						\
279	do {								\
280		if (net_ratelimit())					\
281			pr_err(msg, ##__VA_ARGS__);			\
282	} while (0)
283
284#ifdef CONFIG_IP_VS_DEBUG
285#define EnterFunction(level)						\
286	do {								\
287		if (level <= ip_vs_get_debug_level())			\
288			printk(KERN_DEBUG				\
289			       pr_fmt("Enter: %s, %s line %i\n"),	\
290			       __func__, __FILE__, __LINE__);		\
291	} while (0)
292#define LeaveFunction(level)						\
293	do {								\
294		if (level <= ip_vs_get_debug_level())			\
295			printk(KERN_DEBUG				\
296			       pr_fmt("Leave: %s, %s line %i\n"),	\
297			       __func__, __FILE__, __LINE__);		\
298	} while (0)
299#else
300#define EnterFunction(level)   do {} while (0)
301#define LeaveFunction(level)   do {} while (0)
302#endif
303
304/* The port number of FTP service (in network order). */
305#define FTPPORT  cpu_to_be16(21)
306#define FTPDATA  cpu_to_be16(20)
307
308/* TCP State Values */
309enum {
310	IP_VS_TCP_S_NONE = 0,
311	IP_VS_TCP_S_ESTABLISHED,
312	IP_VS_TCP_S_SYN_SENT,
313	IP_VS_TCP_S_SYN_RECV,
314	IP_VS_TCP_S_FIN_WAIT,
315	IP_VS_TCP_S_TIME_WAIT,
316	IP_VS_TCP_S_CLOSE,
317	IP_VS_TCP_S_CLOSE_WAIT,
318	IP_VS_TCP_S_LAST_ACK,
319	IP_VS_TCP_S_LISTEN,
320	IP_VS_TCP_S_SYNACK,
321	IP_VS_TCP_S_LAST
322};
323
324/* UDP State Values */
325enum {
326	IP_VS_UDP_S_NORMAL,
327	IP_VS_UDP_S_LAST,
328};
329
330/* ICMP State Values */
331enum {
332	IP_VS_ICMP_S_NORMAL,
333	IP_VS_ICMP_S_LAST,
334};
335
336/* SCTP State Values */
337enum ip_vs_sctp_states {
338	IP_VS_SCTP_S_NONE,
339	IP_VS_SCTP_S_INIT1,
340	IP_VS_SCTP_S_INIT,
341	IP_VS_SCTP_S_COOKIE_SENT,
342	IP_VS_SCTP_S_COOKIE_REPLIED,
343	IP_VS_SCTP_S_COOKIE_WAIT,
344	IP_VS_SCTP_S_COOKIE,
345	IP_VS_SCTP_S_COOKIE_ECHOED,
346	IP_VS_SCTP_S_ESTABLISHED,
347	IP_VS_SCTP_S_SHUTDOWN_SENT,
348	IP_VS_SCTP_S_SHUTDOWN_RECEIVED,
349	IP_VS_SCTP_S_SHUTDOWN_ACK_SENT,
350	IP_VS_SCTP_S_REJECTED,
351	IP_VS_SCTP_S_CLOSED,
352	IP_VS_SCTP_S_LAST
353};
354
355/* Delta sequence info structure
356 * Each ip_vs_conn has 2 (output AND input seq. changes).
357 * Only used in the VS/NAT.
358 */
359struct ip_vs_seq {
360	__u32			init_seq;	/* Add delta from this seq */
361	__u32			delta;		/* Delta in sequence numbers */
362	__u32			previous_delta;	/* Delta in sequence numbers
363						 * before last resized pkt */
364};
365
366/* counters per cpu */
367struct ip_vs_counters {
368	__u64		conns;		/* connections scheduled */
369	__u64		inpkts;		/* incoming packets */
370	__u64		outpkts;	/* outgoing packets */
371	__u64		inbytes;	/* incoming bytes */
372	__u64		outbytes;	/* outgoing bytes */
373};
374/* Stats per cpu */
375struct ip_vs_cpu_stats {
376	struct ip_vs_counters   cnt;
377	struct u64_stats_sync   syncp;
378};
379
380/* IPVS statistics objects */
381struct ip_vs_estimator {
382	struct list_head	list;
383
384	u64			last_inbytes;
385	u64			last_outbytes;
386	u64			last_conns;
387	u64			last_inpkts;
388	u64			last_outpkts;
389
390	u64			cps;
391	u64			inpps;
392	u64			outpps;
393	u64			inbps;
394	u64			outbps;
395};
396
397/*
398 * IPVS statistics object, 64-bit kernel version of struct ip_vs_stats_user
399 */
400struct ip_vs_kstats {
401	u64			conns;		/* connections scheduled */
402	u64			inpkts;		/* incoming packets */
403	u64			outpkts;	/* outgoing packets */
404	u64			inbytes;	/* incoming bytes */
405	u64			outbytes;	/* outgoing bytes */
406
407	u64			cps;		/* current connection rate */
408	u64			inpps;		/* current in packet rate */
409	u64			outpps;		/* current out packet rate */
410	u64			inbps;		/* current in byte rate */
411	u64			outbps;		/* current out byte rate */
412};
413
414struct ip_vs_stats {
415	struct ip_vs_kstats	kstats;		/* kernel statistics */
416	struct ip_vs_estimator	est;		/* estimator */
417	struct ip_vs_cpu_stats __percpu	*cpustats;	/* per cpu counters */
418	spinlock_t		lock;		/* spin lock */
419	struct ip_vs_kstats	kstats0;	/* reset values */
420};
421
422struct dst_entry;
423struct iphdr;
424struct ip_vs_conn;
425struct ip_vs_app;
426struct sk_buff;
427struct ip_vs_proto_data;
428
429struct ip_vs_protocol {
430	struct ip_vs_protocol	*next;
431	char			*name;
432	u16			protocol;
433	u16			num_states;
434	int			dont_defrag;
435
436	void (*init)(struct ip_vs_protocol *pp);
437
438	void (*exit)(struct ip_vs_protocol *pp);
439
440	int (*init_netns)(struct net *net, struct ip_vs_proto_data *pd);
441
442	void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd);
443
444	int (*conn_schedule)(int af, struct sk_buff *skb,
445			     struct ip_vs_proto_data *pd,
446			     int *verdict, struct ip_vs_conn **cpp,
447			     struct ip_vs_iphdr *iph);
448
449	struct ip_vs_conn *
450	(*conn_in_get)(int af,
451		       const struct sk_buff *skb,
452		       const struct ip_vs_iphdr *iph,
453		       int inverse);
454
455	struct ip_vs_conn *
456	(*conn_out_get)(int af,
457			const struct sk_buff *skb,
458			const struct ip_vs_iphdr *iph,
459			int inverse);
460
461	int (*snat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp,
462			    struct ip_vs_conn *cp, struct ip_vs_iphdr *iph);
463
464	int (*dnat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp,
465			    struct ip_vs_conn *cp, struct ip_vs_iphdr *iph);
466
467	int (*csum_check)(int af, struct sk_buff *skb,
468			  struct ip_vs_protocol *pp);
469
470	const char *(*state_name)(int state);
471
472	void (*state_transition)(struct ip_vs_conn *cp, int direction,
473				 const struct sk_buff *skb,
474				 struct ip_vs_proto_data *pd);
475
476	int (*register_app)(struct net *net, struct ip_vs_app *inc);
477
478	void (*unregister_app)(struct net *net, struct ip_vs_app *inc);
479
480	int (*app_conn_bind)(struct ip_vs_conn *cp);
481
482	void (*debug_packet)(int af, struct ip_vs_protocol *pp,
483			     const struct sk_buff *skb,
484			     int offset,
485			     const char *msg);
486
487	void (*timeout_change)(struct ip_vs_proto_data *pd, int flags);
488};
489
490/* protocol data per netns */
491struct ip_vs_proto_data {
492	struct ip_vs_proto_data	*next;
493	struct ip_vs_protocol	*pp;
494	int			*timeout_table;	/* protocol timeout table */
495	atomic_t		appcnt;		/* counter of proto app incs. */
496	struct tcp_states_t	*tcp_state_table;
497};
498
499struct ip_vs_protocol   *ip_vs_proto_get(unsigned short proto);
500struct ip_vs_proto_data *ip_vs_proto_data_get(struct net *net,
501					      unsigned short proto);
502
503struct ip_vs_conn_param {
504	struct net			*net;
505	const union nf_inet_addr	*caddr;
506	const union nf_inet_addr	*vaddr;
507	__be16				cport;
508	__be16				vport;
509	__u16				protocol;
510	u16				af;
511
512	const struct ip_vs_pe		*pe;
513	char				*pe_data;
514	__u8				pe_data_len;
515};
516
517/* IP_VS structure allocated for each dynamically scheduled connection */
518struct ip_vs_conn {
519	struct hlist_node	c_list;         /* hashed list heads */
520	/* Protocol, addresses and port numbers */
521	__be16                  cport;
522	__be16                  dport;
523	__be16                  vport;
524	u16			af;		/* address family */
525	union nf_inet_addr      caddr;          /* client address */
526	union nf_inet_addr      vaddr;          /* virtual address */
527	union nf_inet_addr      daddr;          /* destination address */
528	volatile __u32          flags;          /* status flags */
529	__u16                   protocol;       /* Which protocol (TCP/UDP) */
530	__u16			daf;		/* Address family of the dest */
531#ifdef CONFIG_NET_NS
532	struct net              *net;           /* Name space */
533#endif
534
535	/* counter and timer */
536	atomic_t		refcnt;		/* reference count */
537	struct timer_list	timer;		/* Expiration timer */
538	volatile unsigned long	timeout;	/* timeout */
539
540	/* Flags and state transition */
541	spinlock_t              lock;           /* lock for state transition */
542	volatile __u16          state;          /* state info */
543	volatile __u16          old_state;      /* old state, to be used for
544						 * state transition triggerd
545						 * synchronization
546						 */
547	__u32			fwmark;		/* Fire wall mark from skb */
548	unsigned long		sync_endtime;	/* jiffies + sent_retries */
549
550	/* Control members */
551	struct ip_vs_conn       *control;       /* Master control connection */
552	atomic_t                n_control;      /* Number of controlled ones */
553	struct ip_vs_dest       *dest;          /* real server */
554	atomic_t                in_pkts;        /* incoming packet counter */
555
556	/* Packet transmitter for different forwarding methods.  If it
557	 * mangles the packet, it must return NF_DROP or better NF_STOLEN,
558	 * otherwise this must be changed to a sk_buff **.
559	 * NF_ACCEPT can be returned when destination is local.
560	 */
561	int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp,
562			   struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
563
564	/* Note: we can group the following members into a structure,
565	 * in order to save more space, and the following members are
566	 * only used in VS/NAT anyway
567	 */
568	struct ip_vs_app        *app;           /* bound ip_vs_app object */
569	void                    *app_data;      /* Application private data */
570	struct ip_vs_seq        in_seq;         /* incoming seq. struct */
571	struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
572
573	const struct ip_vs_pe	*pe;
574	char			*pe_data;
575	__u8			pe_data_len;
576
577	struct rcu_head		rcu_head;
578};
579
580/* To save some memory in conn table when name space is disabled. */
581static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp)
582{
583#ifdef CONFIG_NET_NS
584	return cp->net;
585#else
586	return &init_net;
587#endif
588}
589
590static inline void ip_vs_conn_net_set(struct ip_vs_conn *cp, struct net *net)
591{
592#ifdef CONFIG_NET_NS
593	cp->net = net;
594#endif
595}
596
597static inline int ip_vs_conn_net_eq(const struct ip_vs_conn *cp,
598				    struct net *net)
599{
600#ifdef CONFIG_NET_NS
601	return cp->net == net;
602#else
603	return 1;
604#endif
605}
606
607/* Extended internal versions of struct ip_vs_service_user and ip_vs_dest_user
608 * for IPv6 support.
609 *
610 * We need these to conveniently pass around service and destination
611 * options, but unfortunately, we also need to keep the old definitions to
612 * maintain userspace backwards compatibility for the setsockopt interface.
613 */
614struct ip_vs_service_user_kern {
615	/* virtual service addresses */
616	u16			af;
617	u16			protocol;
618	union nf_inet_addr	addr;		/* virtual ip address */
619	__be16			port;
620	u32			fwmark;		/* firwall mark of service */
621
622	/* virtual service options */
623	char			*sched_name;
624	char			*pe_name;
625	unsigned int		flags;		/* virtual service flags */
626	unsigned int		timeout;	/* persistent timeout in sec */
627	__be32			netmask;	/* persistent netmask or plen */
628};
629
630
631struct ip_vs_dest_user_kern {
632	/* destination server address */
633	union nf_inet_addr	addr;
634	__be16			port;
635
636	/* real server options */
637	unsigned int		conn_flags;	/* connection flags */
638	int			weight;		/* destination weight */
639
640	/* thresholds for active connections */
641	u32			u_threshold;	/* upper threshold */
642	u32			l_threshold;	/* lower threshold */
643
644	/* Address family of addr */
645	u16			af;
646};
647
648
649/*
650 * The information about the virtual service offered to the net and the
651 * forwarding entries.
652 */
653struct ip_vs_service {
654	struct hlist_node	s_list;   /* for normal service table */
655	struct hlist_node	f_list;   /* for fwmark-based service table */
656	atomic_t		refcnt;   /* reference counter */
657
658	u16			af;       /* address family */
659	__u16			protocol; /* which protocol (TCP/UDP) */
660	union nf_inet_addr	addr;	  /* IP address for virtual service */
661	__be16			port;	  /* port number for the service */
662	__u32                   fwmark;   /* firewall mark of the service */
663	unsigned int		flags;	  /* service status flags */
664	unsigned int		timeout;  /* persistent timeout in ticks */
665	__be32			netmask;  /* grouping granularity, mask/plen */
666	struct net		*net;
667
668	struct list_head	destinations;  /* real server d-linked list */
669	__u32			num_dests;     /* number of servers */
670	struct ip_vs_stats      stats;         /* statistics for the service */
671
672	/* for scheduling */
673	struct ip_vs_scheduler __rcu *scheduler; /* bound scheduler object */
674	spinlock_t		sched_lock;    /* lock sched_data */
675	void			*sched_data;   /* scheduler application data */
676
677	/* alternate persistence engine */
678	struct ip_vs_pe __rcu	*pe;
679
680	struct rcu_head		rcu_head;
681};
682
683/* Information for cached dst */
684struct ip_vs_dest_dst {
685	struct dst_entry	*dst_cache;	/* destination cache entry */
686	u32			dst_cookie;
687	union nf_inet_addr	dst_saddr;
688	struct rcu_head		rcu_head;
689};
690
691/* The real server destination forwarding entry with ip address, port number,
692 * and so on.
693 */
694struct ip_vs_dest {
695	struct list_head	n_list;   /* for the dests in the service */
696	struct hlist_node	d_list;   /* for table with all the dests */
697
698	u16			af;		/* address family */
699	__be16			port;		/* port number of the server */
700	union nf_inet_addr	addr;		/* IP address of the server */
701	volatile unsigned int	flags;		/* dest status flags */
702	atomic_t		conn_flags;	/* flags to copy to conn */
703	atomic_t		weight;		/* server weight */
704
705	atomic_t		refcnt;		/* reference counter */
706	struct ip_vs_stats      stats;          /* statistics */
707	unsigned long		idle_start;	/* start time, jiffies */
708
709	/* connection counters and thresholds */
710	atomic_t		activeconns;	/* active connections */
711	atomic_t		inactconns;	/* inactive connections */
712	atomic_t		persistconns;	/* persistent connections */
713	__u32			u_threshold;	/* upper threshold */
714	__u32			l_threshold;	/* lower threshold */
715
716	/* for destination cache */
717	spinlock_t		dst_lock;	/* lock of dst_cache */
718	struct ip_vs_dest_dst __rcu *dest_dst;	/* cached dst info */
719
720	/* for virtual service */
721	struct ip_vs_service __rcu *svc;	/* service it belongs to */
722	__u16			protocol;	/* which protocol (TCP/UDP) */
723	__be16			vport;		/* virtual port number */
724	union nf_inet_addr	vaddr;		/* virtual IP address */
725	__u32			vfwmark;	/* firewall mark of service */
726
727	struct list_head	t_list;		/* in dest_trash */
728	unsigned int		in_rs_table:1;	/* we are in rs_table */
729};
730
731/* The scheduler object */
732struct ip_vs_scheduler {
733	struct list_head	n_list;		/* d-linked list head */
734	char			*name;		/* scheduler name */
735	atomic_t		refcnt;		/* reference counter */
736	struct module		*module;	/* THIS_MODULE/NULL */
737
738	/* scheduler initializing service */
739	int (*init_service)(struct ip_vs_service *svc);
740	/* scheduling service finish */
741	void (*done_service)(struct ip_vs_service *svc);
742	/* dest is linked */
743	int (*add_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
744	/* dest is unlinked */
745	int (*del_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
746	/* dest is updated */
747	int (*upd_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
748
749	/* selecting a server from the given service */
750	struct ip_vs_dest* (*schedule)(struct ip_vs_service *svc,
751				       const struct sk_buff *skb,
752				       struct ip_vs_iphdr *iph);
753};
754
755/* The persistence engine object */
756struct ip_vs_pe {
757	struct list_head	n_list;		/* d-linked list head */
758	char			*name;		/* scheduler name */
759	atomic_t		refcnt;		/* reference counter */
760	struct module		*module;	/* THIS_MODULE/NULL */
761
762	/* get the connection template, if any */
763	int (*fill_param)(struct ip_vs_conn_param *p, struct sk_buff *skb);
764	bool (*ct_match)(const struct ip_vs_conn_param *p,
765			 struct ip_vs_conn *ct);
766	u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval,
767			   bool inverse);
768	int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf);
769};
770
771/* The application module object (a.k.a. app incarnation) */
772struct ip_vs_app {
773	struct list_head	a_list;		/* member in app list */
774	int			type;		/* IP_VS_APP_TYPE_xxx */
775	char			*name;		/* application module name */
776	__u16			protocol;
777	struct module		*module;	/* THIS_MODULE/NULL */
778	struct list_head	incs_list;	/* list of incarnations */
779
780	/* members for application incarnations */
781	struct list_head	p_list;		/* member in proto app list */
782	struct ip_vs_app	*app;		/* its real application */
783	__be16			port;		/* port number in net order */
784	atomic_t		usecnt;		/* usage counter */
785	struct rcu_head		rcu_head;
786
787	/* output hook: Process packet in inout direction, diff set for TCP.
788	 * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok,
789	 *	   2=Mangled but checksum was not updated
790	 */
791	int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *,
792		       struct sk_buff *, int *diff);
793
794	/* input hook: Process packet in outin direction, diff set for TCP.
795	 * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok,
796	 *	   2=Mangled but checksum was not updated
797	 */
798	int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *,
799		      struct sk_buff *, int *diff);
800
801	/* ip_vs_app initializer */
802	int (*init_conn)(struct ip_vs_app *, struct ip_vs_conn *);
803
804	/* ip_vs_app finish */
805	int (*done_conn)(struct ip_vs_app *, struct ip_vs_conn *);
806
807
808	/* not used now */
809	int (*bind_conn)(struct ip_vs_app *, struct ip_vs_conn *,
810			 struct ip_vs_protocol *);
811
812	void (*unbind_conn)(struct ip_vs_app *, struct ip_vs_conn *);
813
814	int *			timeout_table;
815	int *			timeouts;
816	int			timeouts_size;
817
818	int (*conn_schedule)(struct sk_buff *skb, struct ip_vs_app *app,
819			     int *verdict, struct ip_vs_conn **cpp);
820
821	struct ip_vs_conn *
822	(*conn_in_get)(const struct sk_buff *skb, struct ip_vs_app *app,
823		       const struct iphdr *iph, int inverse);
824
825	struct ip_vs_conn *
826	(*conn_out_get)(const struct sk_buff *skb, struct ip_vs_app *app,
827			const struct iphdr *iph, int inverse);
828
829	int (*state_transition)(struct ip_vs_conn *cp, int direction,
830				const struct sk_buff *skb,
831				struct ip_vs_app *app);
832
833	void (*timeout_change)(struct ip_vs_app *app, int flags);
834};
835
836struct ipvs_master_sync_state {
837	struct list_head	sync_queue;
838	struct ip_vs_sync_buff	*sync_buff;
839	unsigned long		sync_queue_len;
840	unsigned int		sync_queue_delay;
841	struct task_struct	*master_thread;
842	struct delayed_work	master_wakeup_work;
843	struct netns_ipvs	*ipvs;
844};
845
846/* How much time to keep dests in trash */
847#define IP_VS_DEST_TRASH_PERIOD		(120 * HZ)
848
849/* IPVS in network namespace */
850struct netns_ipvs {
851	int			gen;		/* Generation */
852	int			enable;		/* enable like nf_hooks do */
853	/* Hash table: for real service lookups */
854	#define IP_VS_RTAB_BITS 4
855	#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
856	#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
857
858	struct hlist_head	rs_table[IP_VS_RTAB_SIZE];
859	/* ip_vs_app */
860	struct list_head	app_list;
861	/* ip_vs_proto */
862	#define IP_VS_PROTO_TAB_SIZE	32	/* must be power of 2 */
863	struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
864	/* ip_vs_proto_tcp */
865#ifdef CONFIG_IP_VS_PROTO_TCP
866	#define	TCP_APP_TAB_BITS	4
867	#define	TCP_APP_TAB_SIZE	(1 << TCP_APP_TAB_BITS)
868	#define	TCP_APP_TAB_MASK	(TCP_APP_TAB_SIZE - 1)
869	struct list_head	tcp_apps[TCP_APP_TAB_SIZE];
870#endif
871	/* ip_vs_proto_udp */
872#ifdef CONFIG_IP_VS_PROTO_UDP
873	#define	UDP_APP_TAB_BITS	4
874	#define	UDP_APP_TAB_SIZE	(1 << UDP_APP_TAB_BITS)
875	#define	UDP_APP_TAB_MASK	(UDP_APP_TAB_SIZE - 1)
876	struct list_head	udp_apps[UDP_APP_TAB_SIZE];
877#endif
878	/* ip_vs_proto_sctp */
879#ifdef CONFIG_IP_VS_PROTO_SCTP
880	#define SCTP_APP_TAB_BITS	4
881	#define SCTP_APP_TAB_SIZE	(1 << SCTP_APP_TAB_BITS)
882	#define SCTP_APP_TAB_MASK	(SCTP_APP_TAB_SIZE - 1)
883	/* Hash table for SCTP application incarnations	 */
884	struct list_head	sctp_apps[SCTP_APP_TAB_SIZE];
885#endif
886	/* ip_vs_conn */
887	atomic_t		conn_count;      /* connection counter */
888
889	/* ip_vs_ctl */
890	struct ip_vs_stats		tot_stats;  /* Statistics & est. */
891
892	int			num_services;    /* no of virtual services */
893
894	/* Trash for destinations */
895	struct list_head	dest_trash;
896	spinlock_t		dest_trash_lock;
897	struct timer_list	dest_trash_timer; /* expiration timer */
898	/* Service counters */
899	atomic_t		ftpsvc_counter;
900	atomic_t		nullsvc_counter;
901
902#ifdef CONFIG_SYSCTL
903	/* 1/rate drop and drop-entry variables */
904	struct delayed_work	defense_work;   /* Work handler */
905	int			drop_rate;
906	int			drop_counter;
907	atomic_t		dropentry;
908	/* locks in ctl.c */
909	spinlock_t		dropentry_lock;  /* drop entry handling */
910	spinlock_t		droppacket_lock; /* drop packet handling */
911	spinlock_t		securetcp_lock;  /* state and timeout tables */
912
913	/* sys-ctl struct */
914	struct ctl_table_header	*sysctl_hdr;
915	struct ctl_table	*sysctl_tbl;
916#endif
917
918	/* sysctl variables */
919	int			sysctl_amemthresh;
920	int			sysctl_am_droprate;
921	int			sysctl_drop_entry;
922	int			sysctl_drop_packet;
923	int			sysctl_secure_tcp;
924#ifdef CONFIG_IP_VS_NFCT
925	int			sysctl_conntrack;
926#endif
927	int			sysctl_snat_reroute;
928	int			sysctl_sync_ver;
929	int			sysctl_sync_ports;
930	int			sysctl_sync_persist_mode;
931	unsigned long		sysctl_sync_qlen_max;
932	int			sysctl_sync_sock_size;
933	int			sysctl_cache_bypass;
934	int			sysctl_expire_nodest_conn;
935	int			sysctl_sloppy_tcp;
936	int			sysctl_sloppy_sctp;
937	int			sysctl_expire_quiescent_template;
938	int			sysctl_sync_threshold[2];
939	unsigned int		sysctl_sync_refresh_period;
940	int			sysctl_sync_retries;
941	int			sysctl_nat_icmp_send;
942	int			sysctl_pmtu_disc;
943	int			sysctl_backup_only;
944	int			sysctl_conn_reuse_mode;
945
946	/* ip_vs_lblc */
947	int			sysctl_lblc_expiration;
948	struct ctl_table_header	*lblc_ctl_header;
949	struct ctl_table	*lblc_ctl_table;
950	/* ip_vs_lblcr */
951	int			sysctl_lblcr_expiration;
952	struct ctl_table_header	*lblcr_ctl_header;
953	struct ctl_table	*lblcr_ctl_table;
954	/* ip_vs_est */
955	struct list_head	est_list;	/* estimator list */
956	spinlock_t		est_lock;
957	struct timer_list	est_timer;	/* Estimation timer */
958	/* ip_vs_sync */
959	spinlock_t		sync_lock;
960	struct ipvs_master_sync_state *ms;
961	spinlock_t		sync_buff_lock;
962	struct task_struct	**backup_threads;
963	int			threads_mask;
964	int			send_mesg_maxlen;
965	int			recv_mesg_maxlen;
966	volatile int		sync_state;
967	volatile int		master_syncid;
968	volatile int		backup_syncid;
969	struct mutex		sync_mutex;
970	/* multicast interface name */
971	char			master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
972	char			backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
973	/* net name space ptr */
974	struct net		*net;            /* Needed by timer routines */
975	/* Number of heterogeneous destinations, needed becaus heterogeneous
976	 * are not supported when synchronization is enabled.
977	 */
978	unsigned int		mixed_address_family_dests;
979};
980
981#define DEFAULT_SYNC_THRESHOLD	3
982#define DEFAULT_SYNC_PERIOD	50
983#define DEFAULT_SYNC_VER	1
984#define DEFAULT_SLOPPY_TCP	0
985#define DEFAULT_SLOPPY_SCTP	0
986#define DEFAULT_SYNC_REFRESH_PERIOD	(0U * HZ)
987#define DEFAULT_SYNC_RETRIES		0
988#define IPVS_SYNC_WAKEUP_RATE	8
989#define IPVS_SYNC_QLEN_MAX	(IPVS_SYNC_WAKEUP_RATE * 4)
990#define IPVS_SYNC_SEND_DELAY	(HZ / 50)
991#define IPVS_SYNC_CHECK_PERIOD	HZ
992#define IPVS_SYNC_FLUSH_TIME	(HZ * 2)
993#define IPVS_SYNC_PORTS_MAX	(1 << 6)
994
995#ifdef CONFIG_SYSCTL
996
997static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
998{
999	return ipvs->sysctl_sync_threshold[0];
1000}
1001
1002static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
1003{
1004	return ACCESS_ONCE(ipvs->sysctl_sync_threshold[1]);
1005}
1006
1007static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs)
1008{
1009	return ACCESS_ONCE(ipvs->sysctl_sync_refresh_period);
1010}
1011
1012static inline int sysctl_sync_retries(struct netns_ipvs *ipvs)
1013{
1014	return ipvs->sysctl_sync_retries;
1015}
1016
1017static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
1018{
1019	return ipvs->sysctl_sync_ver;
1020}
1021
1022static inline int sysctl_sloppy_tcp(struct netns_ipvs *ipvs)
1023{
1024	return ipvs->sysctl_sloppy_tcp;
1025}
1026
1027static inline int sysctl_sloppy_sctp(struct netns_ipvs *ipvs)
1028{
1029	return ipvs->sysctl_sloppy_sctp;
1030}
1031
1032static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
1033{
1034	return ACCESS_ONCE(ipvs->sysctl_sync_ports);
1035}
1036
1037static inline int sysctl_sync_persist_mode(struct netns_ipvs *ipvs)
1038{
1039	return ipvs->sysctl_sync_persist_mode;
1040}
1041
1042static inline unsigned long sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
1043{
1044	return ipvs->sysctl_sync_qlen_max;
1045}
1046
1047static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
1048{
1049	return ipvs->sysctl_sync_sock_size;
1050}
1051
1052static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs)
1053{
1054	return ipvs->sysctl_pmtu_disc;
1055}
1056
1057static inline int sysctl_backup_only(struct netns_ipvs *ipvs)
1058{
1059	return ipvs->sync_state & IP_VS_STATE_BACKUP &&
1060	       ipvs->sysctl_backup_only;
1061}
1062
1063static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
1064{
1065	return ipvs->sysctl_conn_reuse_mode;
1066}
1067
1068#else
1069
1070static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
1071{
1072	return DEFAULT_SYNC_THRESHOLD;
1073}
1074
1075static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
1076{
1077	return DEFAULT_SYNC_PERIOD;
1078}
1079
1080static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs)
1081{
1082	return DEFAULT_SYNC_REFRESH_PERIOD;
1083}
1084
1085static inline int sysctl_sync_retries(struct netns_ipvs *ipvs)
1086{
1087	return DEFAULT_SYNC_RETRIES & 3;
1088}
1089
1090static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
1091{
1092	return DEFAULT_SYNC_VER;
1093}
1094
1095static inline int sysctl_sloppy_tcp(struct netns_ipvs *ipvs)
1096{
1097	return DEFAULT_SLOPPY_TCP;
1098}
1099
1100static inline int sysctl_sloppy_sctp(struct netns_ipvs *ipvs)
1101{
1102	return DEFAULT_SLOPPY_SCTP;
1103}
1104
1105static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
1106{
1107	return 1;
1108}
1109
1110static inline int sysctl_sync_persist_mode(struct netns_ipvs *ipvs)
1111{
1112	return 0;
1113}
1114
1115static inline unsigned long sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
1116{
1117	return IPVS_SYNC_QLEN_MAX;
1118}
1119
1120static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
1121{
1122	return 0;
1123}
1124
1125static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs)
1126{
1127	return 1;
1128}
1129
1130static inline int sysctl_backup_only(struct netns_ipvs *ipvs)
1131{
1132	return 0;
1133}
1134
1135static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
1136{
1137	return 1;
1138}
1139
1140#endif
1141
1142/* IPVS core functions
1143 * (from ip_vs_core.c)
1144 */
1145const char *ip_vs_proto_name(unsigned int proto);
1146void ip_vs_init_hash_table(struct list_head *table, int rows);
1147#define IP_VS_INIT_HASH_TABLE(t) ip_vs_init_hash_table((t), ARRAY_SIZE((t)))
1148
1149#define IP_VS_APP_TYPE_FTP	1
1150
1151/* ip_vs_conn handling functions
1152 * (from ip_vs_conn.c)
1153 */
1154enum {
1155	IP_VS_DIR_INPUT = 0,
1156	IP_VS_DIR_OUTPUT,
1157	IP_VS_DIR_INPUT_ONLY,
1158	IP_VS_DIR_LAST,
1159};
1160
1161static inline void ip_vs_conn_fill_param(struct net *net, int af, int protocol,
1162					 const union nf_inet_addr *caddr,
1163					 __be16 cport,
1164					 const union nf_inet_addr *vaddr,
1165					 __be16 vport,
1166					 struct ip_vs_conn_param *p)
1167{
1168	p->net = net;
1169	p->af = af;
1170	p->protocol = protocol;
1171	p->caddr = caddr;
1172	p->cport = cport;
1173	p->vaddr = vaddr;
1174	p->vport = vport;
1175	p->pe = NULL;
1176	p->pe_data = NULL;
1177}
1178
1179struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p);
1180struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p);
1181
1182struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
1183					    const struct ip_vs_iphdr *iph,
1184					    int inverse);
1185
1186struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p);
1187
1188struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
1189					     const struct ip_vs_iphdr *iph,
1190					     int inverse);
1191
1192/* Get reference to gain full access to conn.
1193 * By default, RCU read-side critical sections have access only to
1194 * conn fields and its PE data, see ip_vs_conn_rcu_free() for reference.
1195 */
1196static inline bool __ip_vs_conn_get(struct ip_vs_conn *cp)
1197{
1198	return atomic_inc_not_zero(&cp->refcnt);
1199}
1200
1201/* put back the conn without restarting its timer */
1202static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
1203{
1204	smp_mb__before_atomic();
1205	atomic_dec(&cp->refcnt);
1206}
1207void ip_vs_conn_put(struct ip_vs_conn *cp);
1208void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
1209
1210struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
1211				  const union nf_inet_addr *daddr,
1212				  __be16 dport, unsigned int flags,
1213				  struct ip_vs_dest *dest, __u32 fwmark);
1214void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
1215
1216const char *ip_vs_state_name(__u16 proto, int state);
1217
1218void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp);
1219int ip_vs_check_template(struct ip_vs_conn *ct);
1220void ip_vs_random_dropentry(struct net *net);
1221int ip_vs_conn_init(void);
1222void ip_vs_conn_cleanup(void);
1223
1224static inline void ip_vs_control_del(struct ip_vs_conn *cp)
1225{
1226	struct ip_vs_conn *ctl_cp = cp->control;
1227	if (!ctl_cp) {
1228		IP_VS_ERR_BUF("request control DEL for uncontrolled: "
1229			      "%s:%d to %s:%d\n",
1230			      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1231			      ntohs(cp->cport),
1232			      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
1233			      ntohs(cp->vport));
1234
1235		return;
1236	}
1237
1238	IP_VS_DBG_BUF(7, "DELeting control for: "
1239		      "cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
1240		      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1241		      ntohs(cp->cport),
1242		      IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
1243		      ntohs(ctl_cp->cport));
1244
1245	cp->control = NULL;
1246	if (atomic_read(&ctl_cp->n_control) == 0) {
1247		IP_VS_ERR_BUF("BUG control DEL with n=0 : "
1248			      "%s:%d to %s:%d\n",
1249			      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1250			      ntohs(cp->cport),
1251			      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
1252			      ntohs(cp->vport));
1253
1254		return;
1255	}
1256	atomic_dec(&ctl_cp->n_control);
1257}
1258
1259static inline void
1260ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
1261{
1262	if (cp->control) {
1263		IP_VS_ERR_BUF("request control ADD for already controlled: "
1264			      "%s:%d to %s:%d\n",
1265			      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1266			      ntohs(cp->cport),
1267			      IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
1268			      ntohs(cp->vport));
1269
1270		ip_vs_control_del(cp);
1271	}
1272
1273	IP_VS_DBG_BUF(7, "ADDing control for: "
1274		      "cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
1275		      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
1276		      ntohs(cp->cport),
1277		      IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
1278		      ntohs(ctl_cp->cport));
1279
1280	cp->control = ctl_cp;
1281	atomic_inc(&ctl_cp->n_control);
1282}
1283
1284/* IPVS netns init & cleanup functions */
1285int ip_vs_estimator_net_init(struct net *net);
1286int ip_vs_control_net_init(struct net *net);
1287int ip_vs_protocol_net_init(struct net *net);
1288int ip_vs_app_net_init(struct net *net);
1289int ip_vs_conn_net_init(struct net *net);
1290int ip_vs_sync_net_init(struct net *net);
1291void ip_vs_conn_net_cleanup(struct net *net);
1292void ip_vs_app_net_cleanup(struct net *net);
1293void ip_vs_protocol_net_cleanup(struct net *net);
1294void ip_vs_control_net_cleanup(struct net *net);
1295void ip_vs_estimator_net_cleanup(struct net *net);
1296void ip_vs_sync_net_cleanup(struct net *net);
1297void ip_vs_service_net_cleanup(struct net *net);
1298
1299/* IPVS application functions
1300 * (from ip_vs_app.c)
1301 */
1302#define IP_VS_APP_MAX_PORTS  8
1303struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app);
1304void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app);
1305int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
1306void ip_vs_unbind_app(struct ip_vs_conn *cp);
1307int register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
1308			   __u16 port);
1309int ip_vs_app_inc_get(struct ip_vs_app *inc);
1310void ip_vs_app_inc_put(struct ip_vs_app *inc);
1311
1312int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb);
1313int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb);
1314
1315int register_ip_vs_pe(struct ip_vs_pe *pe);
1316int unregister_ip_vs_pe(struct ip_vs_pe *pe);
1317struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
1318struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name);
1319
1320/* Use a #define to avoid all of module.h just for these trivial ops */
1321#define ip_vs_pe_get(pe)			\
1322	if (pe && pe->module)			\
1323		__module_get(pe->module);
1324
1325#define ip_vs_pe_put(pe)			\
1326	if (pe && pe->module)			\
1327		module_put(pe->module);
1328
1329/* IPVS protocol functions (from ip_vs_proto.c) */
1330int ip_vs_protocol_init(void);
1331void ip_vs_protocol_cleanup(void);
1332void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags);
1333int *ip_vs_create_timeout_table(int *table, int size);
1334int ip_vs_set_state_timeout(int *table, int num, const char *const *names,
1335			    const char *name, int to);
1336void ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
1337			       const struct sk_buff *skb, int offset,
1338			       const char *msg);
1339
1340extern struct ip_vs_protocol ip_vs_protocol_tcp;
1341extern struct ip_vs_protocol ip_vs_protocol_udp;
1342extern struct ip_vs_protocol ip_vs_protocol_icmp;
1343extern struct ip_vs_protocol ip_vs_protocol_esp;
1344extern struct ip_vs_protocol ip_vs_protocol_ah;
1345extern struct ip_vs_protocol ip_vs_protocol_sctp;
1346
1347/* Registering/unregistering scheduler functions
1348 * (from ip_vs_sched.c)
1349 */
1350int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
1351int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
1352int ip_vs_bind_scheduler(struct ip_vs_service *svc,
1353			 struct ip_vs_scheduler *scheduler);
1354void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
1355			    struct ip_vs_scheduler *sched);
1356struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
1357void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
1358struct ip_vs_conn *
1359ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
1360	       struct ip_vs_proto_data *pd, int *ignored,
1361	       struct ip_vs_iphdr *iph);
1362int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
1363		struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph);
1364
1365void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg);
1366
1367/* IPVS control data and functions (from ip_vs_ctl.c) */
1368extern struct ip_vs_stats ip_vs_stats;
1369extern int sysctl_ip_vs_sync_ver;
1370
1371struct ip_vs_service *
1372ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
1373		  const union nf_inet_addr *vaddr, __be16 vport);
1374
1375bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
1376			    const union nf_inet_addr *daddr, __be16 dport);
1377
1378int ip_vs_use_count_inc(void);
1379void ip_vs_use_count_dec(void);
1380int ip_vs_register_nl_ioctl(void);
1381void ip_vs_unregister_nl_ioctl(void);
1382int ip_vs_control_init(void);
1383void ip_vs_control_cleanup(void);
1384struct ip_vs_dest *
1385ip_vs_find_dest(struct net *net, int svc_af, int dest_af,
1386		const union nf_inet_addr *daddr, __be16 dport,
1387		const union nf_inet_addr *vaddr, __be16 vport,
1388		__u16 protocol, __u32 fwmark, __u32 flags);
1389void ip_vs_try_bind_dest(struct ip_vs_conn *cp);
1390
1391static inline void ip_vs_dest_hold(struct ip_vs_dest *dest)
1392{
1393	atomic_inc(&dest->refcnt);
1394}
1395
1396static inline void ip_vs_dest_put(struct ip_vs_dest *dest)
1397{
1398	smp_mb__before_atomic();
1399	atomic_dec(&dest->refcnt);
1400}
1401
1402static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest)
1403{
1404	if (atomic_dec_return(&dest->refcnt) < 0)
1405		kfree(dest);
1406}
1407
1408/* IPVS sync daemon data and function prototypes
1409 * (from ip_vs_sync.c)
1410 */
1411int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid);
1412int stop_sync_thread(struct net *net, int state);
1413void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts);
1414
1415/* IPVS rate estimator prototypes (from ip_vs_est.c) */
1416void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats);
1417void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats);
1418void ip_vs_zero_estimator(struct ip_vs_stats *stats);
1419void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats);
1420
1421/* Various IPVS packet transmitters (from ip_vs_xmit.c) */
1422int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1423		    struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1424int ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1425		      struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1426int ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1427		   struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1428int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1429		      struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1430int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1431		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1432int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1433		    struct ip_vs_protocol *pp, int offset,
1434		    unsigned int hooknum, struct ip_vs_iphdr *iph);
1435void ip_vs_dest_dst_rcu_free(struct rcu_head *head);
1436
1437#ifdef CONFIG_IP_VS_IPV6
1438int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1439			 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1440int ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1441		      struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1442int ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1443			 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1444int ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1445		     struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph);
1446int ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1447		       struct ip_vs_protocol *pp, int offset,
1448		       unsigned int hooknum, struct ip_vs_iphdr *iph);
1449#endif
1450
1451#ifdef CONFIG_SYSCTL
1452/* This is a simple mechanism to ignore packets when
1453 * we are loaded. Just set ip_vs_drop_rate to 'n' and
1454 * we start to drop 1/rate of the packets
1455 */
1456static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
1457{
1458	if (!ipvs->drop_rate)
1459		return 0;
1460	if (--ipvs->drop_counter > 0)
1461		return 0;
1462	ipvs->drop_counter = ipvs->drop_rate;
1463	return 1;
1464}
1465#else
1466static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; }
1467#endif
1468
1469/* ip_vs_fwd_tag returns the forwarding tag of the connection */
1470#define IP_VS_FWD_METHOD(cp)  (cp->flags & IP_VS_CONN_F_FWD_MASK)
1471
1472static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp)
1473{
1474	char fwd;
1475
1476	switch (IP_VS_FWD_METHOD(cp)) {
1477	case IP_VS_CONN_F_MASQ:
1478		fwd = 'M'; break;
1479	case IP_VS_CONN_F_LOCALNODE:
1480		fwd = 'L'; break;
1481	case IP_VS_CONN_F_TUNNEL:
1482		fwd = 'T'; break;
1483	case IP_VS_CONN_F_DROUTE:
1484		fwd = 'R'; break;
1485	case IP_VS_CONN_F_BYPASS:
1486		fwd = 'B'; break;
1487	default:
1488		fwd = '?'; break;
1489	}
1490	return fwd;
1491}
1492
1493void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
1494		    struct ip_vs_conn *cp, int dir);
1495
1496#ifdef CONFIG_IP_VS_IPV6
1497void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
1498		       struct ip_vs_conn *cp, int dir);
1499#endif
1500
1501__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset);
1502
1503static inline __wsum ip_vs_check_diff4(__be32 old, __be32 new, __wsum oldsum)
1504{
1505	__be32 diff[2] = { ~old, new };
1506
1507	return csum_partial(diff, sizeof(diff), oldsum);
1508}
1509
1510#ifdef CONFIG_IP_VS_IPV6
1511static inline __wsum ip_vs_check_diff16(const __be32 *old, const __be32 *new,
1512					__wsum oldsum)
1513{
1514	__be32 diff[8] = { ~old[3], ~old[2], ~old[1], ~old[0],
1515			    new[3],  new[2],  new[1],  new[0] };
1516
1517	return csum_partial(diff, sizeof(diff), oldsum);
1518}
1519#endif
1520
1521static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
1522{
1523	__be16 diff[2] = { ~old, new };
1524
1525	return csum_partial(diff, sizeof(diff), oldsum);
1526}
1527
1528/* Forget current conntrack (unconfirmed) and attach notrack entry */
1529static inline void ip_vs_notrack(struct sk_buff *skb)
1530{
1531#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1532	enum ip_conntrack_info ctinfo;
1533	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
1534
1535	if (!ct || !nf_ct_is_untracked(ct)) {
1536		nf_conntrack_put(skb->nfct);
1537		skb->nfct = &nf_ct_untracked_get()->ct_general;
1538		skb->nfctinfo = IP_CT_NEW;
1539		nf_conntrack_get(skb->nfct);
1540	}
1541#endif
1542}
1543
1544#ifdef CONFIG_IP_VS_NFCT
1545/* Netfilter connection tracking
1546 * (from ip_vs_nfct.c)
1547 */
1548static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
1549{
1550#ifdef CONFIG_SYSCTL
1551	return ipvs->sysctl_conntrack;
1552#else
1553	return 0;
1554#endif
1555}
1556
1557void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
1558			    int outin);
1559int ip_vs_confirm_conntrack(struct sk_buff *skb);
1560void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
1561			       struct ip_vs_conn *cp, u_int8_t proto,
1562			       const __be16 port, int from_rs);
1563void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp);
1564
1565#else
1566
1567static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
1568{
1569	return 0;
1570}
1571
1572static inline void ip_vs_update_conntrack(struct sk_buff *skb,
1573					  struct ip_vs_conn *cp, int outin)
1574{
1575}
1576
1577static inline int ip_vs_confirm_conntrack(struct sk_buff *skb)
1578{
1579	return NF_ACCEPT;
1580}
1581
1582static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
1583{
1584}
1585#endif /* CONFIG_IP_VS_NFCT */
1586
1587static inline int
1588ip_vs_dest_conn_overhead(struct ip_vs_dest *dest)
1589{
1590	/* We think the overhead of processing active connections is 256
1591	 * times higher than that of inactive connections in average. (This
1592	 * 256 times might not be accurate, we will change it later) We
1593	 * use the following formula to estimate the overhead now:
1594	 *		  dest->activeconns*256 + dest->inactconns
1595	 */
1596	return (atomic_read(&dest->activeconns) << 8) +
1597		atomic_read(&dest->inactconns);
1598}
1599
1600#endif	/* _NET_IP_VS_H */
1601