1/*
2 * Copyright (c) 2007-2009 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/module.h>
12#include <linux/init.h>
13#include <linux/list.h>
14#include <linux/skbuff.h>
15#include <linux/netlink.h>
16#include <linux/netfilter.h>
17#include <linux/netfilter/nfnetlink.h>
18#include <linux/netfilter/nf_tables.h>
19#include <net/netfilter/nf_tables_core.h>
20#include <net/netfilter/nf_tables.h>
21#include <net/net_namespace.h>
22#include <net/sock.h>
23
24static LIST_HEAD(nf_tables_expressions);
25
26/**
27 *	nft_register_afinfo - register nf_tables address family info
28 *
29 *	@afi: address family info to register
30 *
31 *	Register the address family for use with nf_tables. Returns zero on
32 *	success or a negative errno code otherwise.
33 */
34int nft_register_afinfo(struct net *net, struct nft_af_info *afi)
35{
36	INIT_LIST_HEAD(&afi->tables);
37	nfnl_lock(NFNL_SUBSYS_NFTABLES);
38	list_add_tail_rcu(&afi->list, &net->nft.af_info);
39	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
40	return 0;
41}
42EXPORT_SYMBOL_GPL(nft_register_afinfo);
43
44/**
45 *	nft_unregister_afinfo - unregister nf_tables address family info
46 *
47 *	@afi: address family info to unregister
48 *
49 *	Unregister the address family for use with nf_tables.
50 */
51void nft_unregister_afinfo(struct nft_af_info *afi)
52{
53	nfnl_lock(NFNL_SUBSYS_NFTABLES);
54	list_del_rcu(&afi->list);
55	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
56}
57EXPORT_SYMBOL_GPL(nft_unregister_afinfo);
58
59static struct nft_af_info *nft_afinfo_lookup(struct net *net, int family)
60{
61	struct nft_af_info *afi;
62
63	list_for_each_entry(afi, &net->nft.af_info, list) {
64		if (afi->family == family)
65			return afi;
66	}
67	return NULL;
68}
69
70static struct nft_af_info *
71nf_tables_afinfo_lookup(struct net *net, int family, bool autoload)
72{
73	struct nft_af_info *afi;
74
75	afi = nft_afinfo_lookup(net, family);
76	if (afi != NULL)
77		return afi;
78#ifdef CONFIG_MODULES
79	if (autoload) {
80		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
81		request_module("nft-afinfo-%u", family);
82		nfnl_lock(NFNL_SUBSYS_NFTABLES);
83		afi = nft_afinfo_lookup(net, family);
84		if (afi != NULL)
85			return ERR_PTR(-EAGAIN);
86	}
87#endif
88	return ERR_PTR(-EAFNOSUPPORT);
89}
90
91static void nft_ctx_init(struct nft_ctx *ctx,
92			 const struct sk_buff *skb,
93			 const struct nlmsghdr *nlh,
94			 struct nft_af_info *afi,
95			 struct nft_table *table,
96			 struct nft_chain *chain,
97			 const struct nlattr * const *nla)
98{
99	ctx->net	= sock_net(skb->sk);
100	ctx->afi	= afi;
101	ctx->table	= table;
102	ctx->chain	= chain;
103	ctx->nla   	= nla;
104	ctx->portid	= NETLINK_CB(skb).portid;
105	ctx->report	= nlmsg_report(nlh);
106	ctx->seq	= nlh->nlmsg_seq;
107}
108
109static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type,
110					 u32 size)
111{
112	struct nft_trans *trans;
113
114	trans = kzalloc(sizeof(struct nft_trans) + size, GFP_KERNEL);
115	if (trans == NULL)
116		return NULL;
117
118	trans->msg_type = msg_type;
119	trans->ctx	= *ctx;
120
121	return trans;
122}
123
124static void nft_trans_destroy(struct nft_trans *trans)
125{
126	list_del(&trans->list);
127	kfree(trans);
128}
129
130static void nf_tables_unregister_hooks(const struct nft_table *table,
131				       const struct nft_chain *chain,
132				       unsigned int hook_nops)
133{
134	if (!(table->flags & NFT_TABLE_F_DORMANT) &&
135	    chain->flags & NFT_BASE_CHAIN)
136		nf_unregister_hooks(nft_base_chain(chain)->ops, hook_nops);
137}
138
139/* Internal table flags */
140#define NFT_TABLE_INACTIVE	(1 << 15)
141
142static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
143{
144	struct nft_trans *trans;
145
146	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table));
147	if (trans == NULL)
148		return -ENOMEM;
149
150	if (msg_type == NFT_MSG_NEWTABLE)
151		ctx->table->flags |= NFT_TABLE_INACTIVE;
152
153	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
154	return 0;
155}
156
157static int nft_deltable(struct nft_ctx *ctx)
158{
159	int err;
160
161	err = nft_trans_table_add(ctx, NFT_MSG_DELTABLE);
162	if (err < 0)
163		return err;
164
165	list_del_rcu(&ctx->table->list);
166	return err;
167}
168
169static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
170{
171	struct nft_trans *trans;
172
173	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain));
174	if (trans == NULL)
175		return -ENOMEM;
176
177	if (msg_type == NFT_MSG_NEWCHAIN)
178		ctx->chain->flags |= NFT_CHAIN_INACTIVE;
179
180	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
181	return 0;
182}
183
184static int nft_delchain(struct nft_ctx *ctx)
185{
186	int err;
187
188	err = nft_trans_chain_add(ctx, NFT_MSG_DELCHAIN);
189	if (err < 0)
190		return err;
191
192	ctx->table->use--;
193	list_del_rcu(&ctx->chain->list);
194
195	return err;
196}
197
198static inline bool
199nft_rule_is_active(struct net *net, const struct nft_rule *rule)
200{
201	return (rule->genmask & nft_genmask_cur(net)) == 0;
202}
203
204static inline int
205nft_rule_is_active_next(struct net *net, const struct nft_rule *rule)
206{
207	return (rule->genmask & nft_genmask_next(net)) == 0;
208}
209
210static inline void
211nft_rule_activate_next(struct net *net, struct nft_rule *rule)
212{
213	/* Now inactive, will be active in the future */
214	rule->genmask = nft_genmask_cur(net);
215}
216
217static inline void
218nft_rule_deactivate_next(struct net *net, struct nft_rule *rule)
219{
220	rule->genmask = nft_genmask_next(net);
221}
222
223static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
224{
225	rule->genmask &= ~nft_genmask_next(net);
226}
227
228static int
229nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
230{
231	/* You cannot delete the same rule twice */
232	if (nft_rule_is_active_next(ctx->net, rule)) {
233		nft_rule_deactivate_next(ctx->net, rule);
234		ctx->chain->use--;
235		return 0;
236	}
237	return -ENOENT;
238}
239
240static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
241					    struct nft_rule *rule)
242{
243	struct nft_trans *trans;
244
245	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_rule));
246	if (trans == NULL)
247		return NULL;
248
249	nft_trans_rule(trans) = rule;
250	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
251
252	return trans;
253}
254
255static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule)
256{
257	struct nft_trans *trans;
258	int err;
259
260	trans = nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule);
261	if (trans == NULL)
262		return -ENOMEM;
263
264	err = nf_tables_delrule_deactivate(ctx, rule);
265	if (err < 0) {
266		nft_trans_destroy(trans);
267		return err;
268	}
269
270	return 0;
271}
272
273static int nft_delrule_by_chain(struct nft_ctx *ctx)
274{
275	struct nft_rule *rule;
276	int err;
277
278	list_for_each_entry(rule, &ctx->chain->rules, list) {
279		err = nft_delrule(ctx, rule);
280		if (err < 0)
281			return err;
282	}
283	return 0;
284}
285
286/* Internal set flag */
287#define NFT_SET_INACTIVE	(1 << 15)
288
289static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
290			     struct nft_set *set)
291{
292	struct nft_trans *trans;
293
294	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set));
295	if (trans == NULL)
296		return -ENOMEM;
297
298	if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
299		nft_trans_set_id(trans) =
300			ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
301		set->flags |= NFT_SET_INACTIVE;
302	}
303	nft_trans_set(trans) = set;
304	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
305
306	return 0;
307}
308
309static int nft_delset(struct nft_ctx *ctx, struct nft_set *set)
310{
311	int err;
312
313	err = nft_trans_set_add(ctx, NFT_MSG_DELSET, set);
314	if (err < 0)
315		return err;
316
317	list_del_rcu(&set->list);
318	ctx->table->use--;
319
320	return err;
321}
322
323/*
324 * Tables
325 */
326
327static struct nft_table *nft_table_lookup(const struct nft_af_info *afi,
328					  const struct nlattr *nla)
329{
330	struct nft_table *table;
331
332	list_for_each_entry(table, &afi->tables, list) {
333		if (!nla_strcmp(nla, table->name))
334			return table;
335	}
336	return NULL;
337}
338
339static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi,
340						const struct nlattr *nla)
341{
342	struct nft_table *table;
343
344	if (nla == NULL)
345		return ERR_PTR(-EINVAL);
346
347	table = nft_table_lookup(afi, nla);
348	if (table != NULL)
349		return table;
350
351	return ERR_PTR(-ENOENT);
352}
353
354static inline u64 nf_tables_alloc_handle(struct nft_table *table)
355{
356	return ++table->hgenerator;
357}
358
359static const struct nf_chain_type *chain_type[AF_MAX][NFT_CHAIN_T_MAX];
360
361static const struct nf_chain_type *
362__nf_tables_chain_type_lookup(int family, const struct nlattr *nla)
363{
364	int i;
365
366	for (i = 0; i < NFT_CHAIN_T_MAX; i++) {
367		if (chain_type[family][i] != NULL &&
368		    !nla_strcmp(nla, chain_type[family][i]->name))
369			return chain_type[family][i];
370	}
371	return NULL;
372}
373
374static const struct nf_chain_type *
375nf_tables_chain_type_lookup(const struct nft_af_info *afi,
376			    const struct nlattr *nla,
377			    bool autoload)
378{
379	const struct nf_chain_type *type;
380
381	type = __nf_tables_chain_type_lookup(afi->family, nla);
382	if (type != NULL)
383		return type;
384#ifdef CONFIG_MODULES
385	if (autoload) {
386		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
387		request_module("nft-chain-%u-%.*s", afi->family,
388			       nla_len(nla), (const char *)nla_data(nla));
389		nfnl_lock(NFNL_SUBSYS_NFTABLES);
390		type = __nf_tables_chain_type_lookup(afi->family, nla);
391		if (type != NULL)
392			return ERR_PTR(-EAGAIN);
393	}
394#endif
395	return ERR_PTR(-ENOENT);
396}
397
398static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
399	[NFTA_TABLE_NAME]	= { .type = NLA_STRING,
400				    .len = NFT_TABLE_MAXNAMELEN - 1 },
401	[NFTA_TABLE_FLAGS]	= { .type = NLA_U32 },
402};
403
404static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
405				     u32 portid, u32 seq, int event, u32 flags,
406				     int family, const struct nft_table *table)
407{
408	struct nlmsghdr *nlh;
409	struct nfgenmsg *nfmsg;
410
411	event |= NFNL_SUBSYS_NFTABLES << 8;
412	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
413	if (nlh == NULL)
414		goto nla_put_failure;
415
416	nfmsg = nlmsg_data(nlh);
417	nfmsg->nfgen_family	= family;
418	nfmsg->version		= NFNETLINK_V0;
419	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);
420
421	if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
422	    nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
423	    nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)))
424		goto nla_put_failure;
425
426	nlmsg_end(skb, nlh);
427	return 0;
428
429nla_put_failure:
430	nlmsg_trim(skb, nlh);
431	return -1;
432}
433
434static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
435{
436	struct sk_buff *skb;
437	int err;
438
439	if (!ctx->report &&
440	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
441		return 0;
442
443	err = -ENOBUFS;
444	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
445	if (skb == NULL)
446		goto err;
447
448	err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq,
449					event, 0, ctx->afi->family, ctx->table);
450	if (err < 0) {
451		kfree_skb(skb);
452		goto err;
453	}
454
455	err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
456			     ctx->report, GFP_KERNEL);
457err:
458	if (err < 0) {
459		nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
460				  err);
461	}
462	return err;
463}
464
465static int nf_tables_dump_tables(struct sk_buff *skb,
466				 struct netlink_callback *cb)
467{
468	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
469	const struct nft_af_info *afi;
470	const struct nft_table *table;
471	unsigned int idx = 0, s_idx = cb->args[0];
472	struct net *net = sock_net(skb->sk);
473	int family = nfmsg->nfgen_family;
474
475	rcu_read_lock();
476	cb->seq = net->nft.base_seq;
477
478	list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
479		if (family != NFPROTO_UNSPEC && family != afi->family)
480			continue;
481
482		list_for_each_entry_rcu(table, &afi->tables, list) {
483			if (idx < s_idx)
484				goto cont;
485			if (idx > s_idx)
486				memset(&cb->args[1], 0,
487				       sizeof(cb->args) - sizeof(cb->args[0]));
488			if (nf_tables_fill_table_info(skb, net,
489						      NETLINK_CB(cb->skb).portid,
490						      cb->nlh->nlmsg_seq,
491						      NFT_MSG_NEWTABLE,
492						      NLM_F_MULTI,
493						      afi->family, table) < 0)
494				goto done;
495
496			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
497cont:
498			idx++;
499		}
500	}
501done:
502	rcu_read_unlock();
503	cb->args[0] = idx;
504	return skb->len;
505}
506
507static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
508			      const struct nlmsghdr *nlh,
509			      const struct nlattr * const nla[])
510{
511	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
512	const struct nft_af_info *afi;
513	const struct nft_table *table;
514	struct sk_buff *skb2;
515	struct net *net = sock_net(skb->sk);
516	int family = nfmsg->nfgen_family;
517	int err;
518
519	if (nlh->nlmsg_flags & NLM_F_DUMP) {
520		struct netlink_dump_control c = {
521			.dump = nf_tables_dump_tables,
522		};
523		return netlink_dump_start(nlsk, skb, nlh, &c);
524	}
525
526	afi = nf_tables_afinfo_lookup(net, family, false);
527	if (IS_ERR(afi))
528		return PTR_ERR(afi);
529
530	table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
531	if (IS_ERR(table))
532		return PTR_ERR(table);
533	if (table->flags & NFT_TABLE_INACTIVE)
534		return -ENOENT;
535
536	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
537	if (!skb2)
538		return -ENOMEM;
539
540	err = nf_tables_fill_table_info(skb2, net, NETLINK_CB(skb).portid,
541					nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0,
542					family, table);
543	if (err < 0)
544		goto err;
545
546	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
547
548err:
549	kfree_skb(skb2);
550	return err;
551}
552
553static int nf_tables_table_enable(const struct nft_af_info *afi,
554				  struct nft_table *table)
555{
556	struct nft_chain *chain;
557	int err, i = 0;
558
559	list_for_each_entry(chain, &table->chains, list) {
560		if (!(chain->flags & NFT_BASE_CHAIN))
561			continue;
562
563		err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops);
564		if (err < 0)
565			goto err;
566
567		i++;
568	}
569	return 0;
570err:
571	list_for_each_entry(chain, &table->chains, list) {
572		if (!(chain->flags & NFT_BASE_CHAIN))
573			continue;
574
575		if (i-- <= 0)
576			break;
577
578		nf_unregister_hooks(nft_base_chain(chain)->ops, afi->nops);
579	}
580	return err;
581}
582
583static void nf_tables_table_disable(const struct nft_af_info *afi,
584				   struct nft_table *table)
585{
586	struct nft_chain *chain;
587
588	list_for_each_entry(chain, &table->chains, list) {
589		if (chain->flags & NFT_BASE_CHAIN)
590			nf_unregister_hooks(nft_base_chain(chain)->ops,
591					    afi->nops);
592	}
593}
594
595static int nf_tables_updtable(struct nft_ctx *ctx)
596{
597	struct nft_trans *trans;
598	u32 flags;
599	int ret = 0;
600
601	if (!ctx->nla[NFTA_TABLE_FLAGS])
602		return 0;
603
604	flags = ntohl(nla_get_be32(ctx->nla[NFTA_TABLE_FLAGS]));
605	if (flags & ~NFT_TABLE_F_DORMANT)
606		return -EINVAL;
607
608	if (flags == ctx->table->flags)
609		return 0;
610
611	trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
612				sizeof(struct nft_trans_table));
613	if (trans == NULL)
614		return -ENOMEM;
615
616	if ((flags & NFT_TABLE_F_DORMANT) &&
617	    !(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
618		nft_trans_table_enable(trans) = false;
619	} else if (!(flags & NFT_TABLE_F_DORMANT) &&
620		   ctx->table->flags & NFT_TABLE_F_DORMANT) {
621		ret = nf_tables_table_enable(ctx->afi, ctx->table);
622		if (ret >= 0) {
623			ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
624			nft_trans_table_enable(trans) = true;
625		}
626	}
627	if (ret < 0)
628		goto err;
629
630	nft_trans_table_update(trans) = true;
631	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
632	return 0;
633err:
634	nft_trans_destroy(trans);
635	return ret;
636}
637
638static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
639			      const struct nlmsghdr *nlh,
640			      const struct nlattr * const nla[])
641{
642	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
643	const struct nlattr *name;
644	struct nft_af_info *afi;
645	struct nft_table *table;
646	struct net *net = sock_net(skb->sk);
647	int family = nfmsg->nfgen_family;
648	u32 flags = 0;
649	struct nft_ctx ctx;
650	int err;
651
652	afi = nf_tables_afinfo_lookup(net, family, true);
653	if (IS_ERR(afi))
654		return PTR_ERR(afi);
655
656	name = nla[NFTA_TABLE_NAME];
657	table = nf_tables_table_lookup(afi, name);
658	if (IS_ERR(table)) {
659		if (PTR_ERR(table) != -ENOENT)
660			return PTR_ERR(table);
661		table = NULL;
662	}
663
664	if (table != NULL) {
665		if (table->flags & NFT_TABLE_INACTIVE)
666			return -ENOENT;
667		if (nlh->nlmsg_flags & NLM_F_EXCL)
668			return -EEXIST;
669		if (nlh->nlmsg_flags & NLM_F_REPLACE)
670			return -EOPNOTSUPP;
671
672		nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
673		return nf_tables_updtable(&ctx);
674	}
675
676	if (nla[NFTA_TABLE_FLAGS]) {
677		flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS]));
678		if (flags & ~NFT_TABLE_F_DORMANT)
679			return -EINVAL;
680	}
681
682	if (!try_module_get(afi->owner))
683		return -EAFNOSUPPORT;
684
685	err = -ENOMEM;
686	table = kzalloc(sizeof(*table), GFP_KERNEL);
687	if (table == NULL)
688		goto err1;
689
690	nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
691	INIT_LIST_HEAD(&table->chains);
692	INIT_LIST_HEAD(&table->sets);
693	table->flags = flags;
694
695	nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
696	err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
697	if (err < 0)
698		goto err2;
699
700	list_add_tail_rcu(&table->list, &afi->tables);
701	return 0;
702err2:
703	kfree(table);
704err1:
705	module_put(afi->owner);
706	return err;
707}
708
709static int nft_flush_table(struct nft_ctx *ctx)
710{
711	int err;
712	struct nft_chain *chain, *nc;
713	struct nft_set *set, *ns;
714
715	list_for_each_entry(chain, &ctx->table->chains, list) {
716		ctx->chain = chain;
717
718		err = nft_delrule_by_chain(ctx);
719		if (err < 0)
720			goto out;
721	}
722
723	list_for_each_entry_safe(set, ns, &ctx->table->sets, list) {
724		if (set->flags & NFT_SET_ANONYMOUS &&
725		    !list_empty(&set->bindings))
726			continue;
727
728		err = nft_delset(ctx, set);
729		if (err < 0)
730			goto out;
731	}
732
733	list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) {
734		ctx->chain = chain;
735
736		err = nft_delchain(ctx);
737		if (err < 0)
738			goto out;
739	}
740
741	err = nft_deltable(ctx);
742out:
743	return err;
744}
745
746static int nft_flush(struct nft_ctx *ctx, int family)
747{
748	struct nft_af_info *afi;
749	struct nft_table *table, *nt;
750	const struct nlattr * const *nla = ctx->nla;
751	int err = 0;
752
753	list_for_each_entry(afi, &ctx->net->nft.af_info, list) {
754		if (family != AF_UNSPEC && afi->family != family)
755			continue;
756
757		ctx->afi = afi;
758		list_for_each_entry_safe(table, nt, &afi->tables, list) {
759			if (nla[NFTA_TABLE_NAME] &&
760			    nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0)
761				continue;
762
763			ctx->table = table;
764
765			err = nft_flush_table(ctx);
766			if (err < 0)
767				goto out;
768		}
769	}
770out:
771	return err;
772}
773
774static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
775			      const struct nlmsghdr *nlh,
776			      const struct nlattr * const nla[])
777{
778	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
779	struct nft_af_info *afi;
780	struct nft_table *table;
781	struct net *net = sock_net(skb->sk);
782	int family = nfmsg->nfgen_family;
783	struct nft_ctx ctx;
784
785	nft_ctx_init(&ctx, skb, nlh, NULL, NULL, NULL, nla);
786	if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL)
787		return nft_flush(&ctx, family);
788
789	afi = nf_tables_afinfo_lookup(net, family, false);
790	if (IS_ERR(afi))
791		return PTR_ERR(afi);
792
793	table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
794	if (IS_ERR(table))
795		return PTR_ERR(table);
796	if (table->flags & NFT_TABLE_INACTIVE)
797		return -ENOENT;
798
799	ctx.afi = afi;
800	ctx.table = table;
801
802	return nft_flush_table(&ctx);
803}
804
805static void nf_tables_table_destroy(struct nft_ctx *ctx)
806{
807	BUG_ON(ctx->table->use > 0);
808
809	kfree(ctx->table);
810	module_put(ctx->afi->owner);
811}
812
813int nft_register_chain_type(const struct nf_chain_type *ctype)
814{
815	int err = 0;
816
817	nfnl_lock(NFNL_SUBSYS_NFTABLES);
818	if (chain_type[ctype->family][ctype->type] != NULL) {
819		err = -EBUSY;
820		goto out;
821	}
822	chain_type[ctype->family][ctype->type] = ctype;
823out:
824	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
825	return err;
826}
827EXPORT_SYMBOL_GPL(nft_register_chain_type);
828
829void nft_unregister_chain_type(const struct nf_chain_type *ctype)
830{
831	nfnl_lock(NFNL_SUBSYS_NFTABLES);
832	chain_type[ctype->family][ctype->type] = NULL;
833	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
834}
835EXPORT_SYMBOL_GPL(nft_unregister_chain_type);
836
837/*
838 * Chains
839 */
840
841static struct nft_chain *
842nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle)
843{
844	struct nft_chain *chain;
845
846	list_for_each_entry(chain, &table->chains, list) {
847		if (chain->handle == handle)
848			return chain;
849	}
850
851	return ERR_PTR(-ENOENT);
852}
853
854static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table,
855						const struct nlattr *nla)
856{
857	struct nft_chain *chain;
858
859	if (nla == NULL)
860		return ERR_PTR(-EINVAL);
861
862	list_for_each_entry(chain, &table->chains, list) {
863		if (!nla_strcmp(nla, chain->name))
864			return chain;
865	}
866
867	return ERR_PTR(-ENOENT);
868}
869
870static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
871	[NFTA_CHAIN_TABLE]	= { .type = NLA_STRING },
872	[NFTA_CHAIN_HANDLE]	= { .type = NLA_U64 },
873	[NFTA_CHAIN_NAME]	= { .type = NLA_STRING,
874				    .len = NFT_CHAIN_MAXNAMELEN - 1 },
875	[NFTA_CHAIN_HOOK]	= { .type = NLA_NESTED },
876	[NFTA_CHAIN_POLICY]	= { .type = NLA_U32 },
877	[NFTA_CHAIN_TYPE]	= { .type = NLA_STRING },
878	[NFTA_CHAIN_COUNTERS]	= { .type = NLA_NESTED },
879};
880
881static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = {
882	[NFTA_HOOK_HOOKNUM]	= { .type = NLA_U32 },
883	[NFTA_HOOK_PRIORITY]	= { .type = NLA_U32 },
884};
885
886static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats)
887{
888	struct nft_stats *cpu_stats, total;
889	struct nlattr *nest;
890	unsigned int seq;
891	u64 pkts, bytes;
892	int cpu;
893
894	memset(&total, 0, sizeof(total));
895	for_each_possible_cpu(cpu) {
896		cpu_stats = per_cpu_ptr(stats, cpu);
897		do {
898			seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
899			pkts = cpu_stats->pkts;
900			bytes = cpu_stats->bytes;
901		} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
902		total.pkts += pkts;
903		total.bytes += bytes;
904	}
905	nest = nla_nest_start(skb, NFTA_CHAIN_COUNTERS);
906	if (nest == NULL)
907		goto nla_put_failure;
908
909	if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.pkts)) ||
910	    nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)))
911		goto nla_put_failure;
912
913	nla_nest_end(skb, nest);
914	return 0;
915
916nla_put_failure:
917	return -ENOSPC;
918}
919
920static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
921				     u32 portid, u32 seq, int event, u32 flags,
922				     int family, const struct nft_table *table,
923				     const struct nft_chain *chain)
924{
925	struct nlmsghdr *nlh;
926	struct nfgenmsg *nfmsg;
927
928	event |= NFNL_SUBSYS_NFTABLES << 8;
929	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
930	if (nlh == NULL)
931		goto nla_put_failure;
932
933	nfmsg = nlmsg_data(nlh);
934	nfmsg->nfgen_family	= family;
935	nfmsg->version		= NFNETLINK_V0;
936	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);
937
938	if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name))
939		goto nla_put_failure;
940	if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle)))
941		goto nla_put_failure;
942	if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name))
943		goto nla_put_failure;
944
945	if (chain->flags & NFT_BASE_CHAIN) {
946		const struct nft_base_chain *basechain = nft_base_chain(chain);
947		const struct nf_hook_ops *ops = &basechain->ops[0];
948		struct nlattr *nest;
949
950		nest = nla_nest_start(skb, NFTA_CHAIN_HOOK);
951		if (nest == NULL)
952			goto nla_put_failure;
953		if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
954			goto nla_put_failure;
955		if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
956			goto nla_put_failure;
957		nla_nest_end(skb, nest);
958
959		if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
960				 htonl(basechain->policy)))
961			goto nla_put_failure;
962
963		if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name))
964			goto nla_put_failure;
965
966		if (nft_dump_stats(skb, nft_base_chain(chain)->stats))
967			goto nla_put_failure;
968	}
969
970	if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use)))
971		goto nla_put_failure;
972
973	nlmsg_end(skb, nlh);
974	return 0;
975
976nla_put_failure:
977	nlmsg_trim(skb, nlh);
978	return -1;
979}
980
981static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
982{
983	struct sk_buff *skb;
984	int err;
985
986	if (!ctx->report &&
987	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
988		return 0;
989
990	err = -ENOBUFS;
991	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
992	if (skb == NULL)
993		goto err;
994
995	err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq,
996					event, 0, ctx->afi->family, ctx->table,
997					ctx->chain);
998	if (err < 0) {
999		kfree_skb(skb);
1000		goto err;
1001	}
1002
1003	err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
1004			     ctx->report, GFP_KERNEL);
1005err:
1006	if (err < 0) {
1007		nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
1008				  err);
1009	}
1010	return err;
1011}
1012
1013static int nf_tables_dump_chains(struct sk_buff *skb,
1014				 struct netlink_callback *cb)
1015{
1016	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
1017	const struct nft_af_info *afi;
1018	const struct nft_table *table;
1019	const struct nft_chain *chain;
1020	unsigned int idx = 0, s_idx = cb->args[0];
1021	struct net *net = sock_net(skb->sk);
1022	int family = nfmsg->nfgen_family;
1023
1024	rcu_read_lock();
1025	cb->seq = net->nft.base_seq;
1026
1027	list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
1028		if (family != NFPROTO_UNSPEC && family != afi->family)
1029			continue;
1030
1031		list_for_each_entry_rcu(table, &afi->tables, list) {
1032			list_for_each_entry_rcu(chain, &table->chains, list) {
1033				if (idx < s_idx)
1034					goto cont;
1035				if (idx > s_idx)
1036					memset(&cb->args[1], 0,
1037					       sizeof(cb->args) - sizeof(cb->args[0]));
1038				if (nf_tables_fill_chain_info(skb, net,
1039							      NETLINK_CB(cb->skb).portid,
1040							      cb->nlh->nlmsg_seq,
1041							      NFT_MSG_NEWCHAIN,
1042							      NLM_F_MULTI,
1043							      afi->family, table, chain) < 0)
1044					goto done;
1045
1046				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1047cont:
1048				idx++;
1049			}
1050		}
1051	}
1052done:
1053	rcu_read_unlock();
1054	cb->args[0] = idx;
1055	return skb->len;
1056}
1057
1058static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
1059			      const struct nlmsghdr *nlh,
1060			      const struct nlattr * const nla[])
1061{
1062	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1063	const struct nft_af_info *afi;
1064	const struct nft_table *table;
1065	const struct nft_chain *chain;
1066	struct sk_buff *skb2;
1067	struct net *net = sock_net(skb->sk);
1068	int family = nfmsg->nfgen_family;
1069	int err;
1070
1071	if (nlh->nlmsg_flags & NLM_F_DUMP) {
1072		struct netlink_dump_control c = {
1073			.dump = nf_tables_dump_chains,
1074		};
1075		return netlink_dump_start(nlsk, skb, nlh, &c);
1076	}
1077
1078	afi = nf_tables_afinfo_lookup(net, family, false);
1079	if (IS_ERR(afi))
1080		return PTR_ERR(afi);
1081
1082	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
1083	if (IS_ERR(table))
1084		return PTR_ERR(table);
1085	if (table->flags & NFT_TABLE_INACTIVE)
1086		return -ENOENT;
1087
1088	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
1089	if (IS_ERR(chain))
1090		return PTR_ERR(chain);
1091	if (chain->flags & NFT_CHAIN_INACTIVE)
1092		return -ENOENT;
1093
1094	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1095	if (!skb2)
1096		return -ENOMEM;
1097
1098	err = nf_tables_fill_chain_info(skb2, net, NETLINK_CB(skb).portid,
1099					nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0,
1100					family, table, chain);
1101	if (err < 0)
1102		goto err;
1103
1104	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
1105
1106err:
1107	kfree_skb(skb2);
1108	return err;
1109}
1110
1111static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
1112	[NFTA_COUNTER_PACKETS]	= { .type = NLA_U64 },
1113	[NFTA_COUNTER_BYTES]	= { .type = NLA_U64 },
1114};
1115
1116static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
1117{
1118	struct nlattr *tb[NFTA_COUNTER_MAX+1];
1119	struct nft_stats __percpu *newstats;
1120	struct nft_stats *stats;
1121	int err;
1122
1123	err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy);
1124	if (err < 0)
1125		return ERR_PTR(err);
1126
1127	if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS])
1128		return ERR_PTR(-EINVAL);
1129
1130	newstats = netdev_alloc_pcpu_stats(struct nft_stats);
1131	if (newstats == NULL)
1132		return ERR_PTR(-ENOMEM);
1133
1134	/* Restore old counters on this cpu, no problem. Per-cpu statistics
1135	 * are not exposed to userspace.
1136	 */
1137	preempt_disable();
1138	stats = this_cpu_ptr(newstats);
1139	stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
1140	stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
1141	preempt_enable();
1142
1143	return newstats;
1144}
1145
1146static void nft_chain_stats_replace(struct nft_base_chain *chain,
1147				    struct nft_stats __percpu *newstats)
1148{
1149	if (newstats == NULL)
1150		return;
1151
1152	if (chain->stats) {
1153		struct nft_stats __percpu *oldstats =
1154				nft_dereference(chain->stats);
1155
1156		rcu_assign_pointer(chain->stats, newstats);
1157		synchronize_rcu();
1158		free_percpu(oldstats);
1159	} else
1160		rcu_assign_pointer(chain->stats, newstats);
1161}
1162
1163static void nf_tables_chain_destroy(struct nft_chain *chain)
1164{
1165	BUG_ON(chain->use > 0);
1166
1167	if (chain->flags & NFT_BASE_CHAIN) {
1168		module_put(nft_base_chain(chain)->type->owner);
1169		free_percpu(nft_base_chain(chain)->stats);
1170		kfree(nft_base_chain(chain));
1171	} else {
1172		kfree(chain);
1173	}
1174}
1175
1176static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
1177			      const struct nlmsghdr *nlh,
1178			      const struct nlattr * const nla[])
1179{
1180	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1181	const struct nlattr * uninitialized_var(name);
1182	struct nft_af_info *afi;
1183	struct nft_table *table;
1184	struct nft_chain *chain;
1185	struct nft_base_chain *basechain = NULL;
1186	struct nlattr *ha[NFTA_HOOK_MAX + 1];
1187	struct net *net = sock_net(skb->sk);
1188	int family = nfmsg->nfgen_family;
1189	u8 policy = NF_ACCEPT;
1190	u64 handle = 0;
1191	unsigned int i;
1192	struct nft_stats __percpu *stats;
1193	int err;
1194	bool create;
1195	struct nft_ctx ctx;
1196
1197	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
1198
1199	afi = nf_tables_afinfo_lookup(net, family, true);
1200	if (IS_ERR(afi))
1201		return PTR_ERR(afi);
1202
1203	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
1204	if (IS_ERR(table))
1205		return PTR_ERR(table);
1206
1207	chain = NULL;
1208	name = nla[NFTA_CHAIN_NAME];
1209
1210	if (nla[NFTA_CHAIN_HANDLE]) {
1211		handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
1212		chain = nf_tables_chain_lookup_byhandle(table, handle);
1213		if (IS_ERR(chain))
1214			return PTR_ERR(chain);
1215	} else {
1216		chain = nf_tables_chain_lookup(table, name);
1217		if (IS_ERR(chain)) {
1218			if (PTR_ERR(chain) != -ENOENT)
1219				return PTR_ERR(chain);
1220			chain = NULL;
1221		}
1222	}
1223
1224	if (nla[NFTA_CHAIN_POLICY]) {
1225		if ((chain != NULL &&
1226		    !(chain->flags & NFT_BASE_CHAIN)))
1227			return -EOPNOTSUPP;
1228
1229		if (chain == NULL &&
1230		    nla[NFTA_CHAIN_HOOK] == NULL)
1231			return -EOPNOTSUPP;
1232
1233		policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY]));
1234		switch (policy) {
1235		case NF_DROP:
1236		case NF_ACCEPT:
1237			break;
1238		default:
1239			return -EINVAL;
1240		}
1241	}
1242
1243	if (chain != NULL) {
1244		struct nft_stats *stats = NULL;
1245		struct nft_trans *trans;
1246
1247		if (chain->flags & NFT_CHAIN_INACTIVE)
1248			return -ENOENT;
1249		if (nlh->nlmsg_flags & NLM_F_EXCL)
1250			return -EEXIST;
1251		if (nlh->nlmsg_flags & NLM_F_REPLACE)
1252			return -EOPNOTSUPP;
1253
1254		if (nla[NFTA_CHAIN_HANDLE] && name &&
1255		    !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME])))
1256			return -EEXIST;
1257
1258		if (nla[NFTA_CHAIN_COUNTERS]) {
1259			if (!(chain->flags & NFT_BASE_CHAIN))
1260				return -EOPNOTSUPP;
1261
1262			stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
1263			if (IS_ERR(stats))
1264				return PTR_ERR(stats);
1265		}
1266
1267		nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
1268		trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN,
1269					sizeof(struct nft_trans_chain));
1270		if (trans == NULL) {
1271			free_percpu(stats);
1272			return -ENOMEM;
1273		}
1274
1275		nft_trans_chain_stats(trans) = stats;
1276		nft_trans_chain_update(trans) = true;
1277
1278		if (nla[NFTA_CHAIN_POLICY])
1279			nft_trans_chain_policy(trans) = policy;
1280		else
1281			nft_trans_chain_policy(trans) = -1;
1282
1283		if (nla[NFTA_CHAIN_HANDLE] && name) {
1284			nla_strlcpy(nft_trans_chain_name(trans), name,
1285				    NFT_CHAIN_MAXNAMELEN);
1286		}
1287		list_add_tail(&trans->list, &net->nft.commit_list);
1288		return 0;
1289	}
1290
1291	if (table->use == UINT_MAX)
1292		return -EOVERFLOW;
1293
1294	if (nla[NFTA_CHAIN_HOOK]) {
1295		const struct nf_chain_type *type;
1296		struct nf_hook_ops *ops;
1297		nf_hookfn *hookfn;
1298		u32 hooknum, priority;
1299
1300		type = chain_type[family][NFT_CHAIN_T_DEFAULT];
1301		if (nla[NFTA_CHAIN_TYPE]) {
1302			type = nf_tables_chain_type_lookup(afi,
1303							   nla[NFTA_CHAIN_TYPE],
1304							   create);
1305			if (IS_ERR(type))
1306				return PTR_ERR(type);
1307		}
1308
1309		err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
1310				       nft_hook_policy);
1311		if (err < 0)
1312			return err;
1313		if (ha[NFTA_HOOK_HOOKNUM] == NULL ||
1314		    ha[NFTA_HOOK_PRIORITY] == NULL)
1315			return -EINVAL;
1316
1317		hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
1318		if (hooknum >= afi->nhooks)
1319			return -EINVAL;
1320		priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
1321
1322		if (!(type->hook_mask & (1 << hooknum)))
1323			return -EOPNOTSUPP;
1324		if (!try_module_get(type->owner))
1325			return -ENOENT;
1326		hookfn = type->hooks[hooknum];
1327
1328		basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
1329		if (basechain == NULL) {
1330			module_put(type->owner);
1331			return -ENOMEM;
1332		}
1333
1334		if (nla[NFTA_CHAIN_COUNTERS]) {
1335			stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
1336			if (IS_ERR(stats)) {
1337				module_put(type->owner);
1338				kfree(basechain);
1339				return PTR_ERR(stats);
1340			}
1341			basechain->stats = stats;
1342		} else {
1343			stats = netdev_alloc_pcpu_stats(struct nft_stats);
1344			if (stats == NULL) {
1345				module_put(type->owner);
1346				kfree(basechain);
1347				return -ENOMEM;
1348			}
1349			rcu_assign_pointer(basechain->stats, stats);
1350		}
1351
1352		write_pnet(&basechain->pnet, net);
1353		basechain->type = type;
1354		chain = &basechain->chain;
1355
1356		for (i = 0; i < afi->nops; i++) {
1357			ops = &basechain->ops[i];
1358			ops->pf		= family;
1359			ops->owner	= afi->owner;
1360			ops->hooknum	= hooknum;
1361			ops->priority	= priority;
1362			ops->priv	= chain;
1363			ops->hook	= afi->hooks[ops->hooknum];
1364			if (hookfn)
1365				ops->hook = hookfn;
1366			if (afi->hook_ops_init)
1367				afi->hook_ops_init(ops, i);
1368		}
1369
1370		chain->flags |= NFT_BASE_CHAIN;
1371		basechain->policy = policy;
1372	} else {
1373		chain = kzalloc(sizeof(*chain), GFP_KERNEL);
1374		if (chain == NULL)
1375			return -ENOMEM;
1376	}
1377
1378	INIT_LIST_HEAD(&chain->rules);
1379	chain->handle = nf_tables_alloc_handle(table);
1380	chain->table = table;
1381	nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
1382
1383	if (!(table->flags & NFT_TABLE_F_DORMANT) &&
1384	    chain->flags & NFT_BASE_CHAIN) {
1385		err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops);
1386		if (err < 0)
1387			goto err1;
1388	}
1389
1390	nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
1391	err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN);
1392	if (err < 0)
1393		goto err2;
1394
1395	table->use++;
1396	list_add_tail_rcu(&chain->list, &table->chains);
1397	return 0;
1398err2:
1399	nf_tables_unregister_hooks(table, chain, afi->nops);
1400err1:
1401	nf_tables_chain_destroy(chain);
1402	return err;
1403}
1404
1405static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
1406			      const struct nlmsghdr *nlh,
1407			      const struct nlattr * const nla[])
1408{
1409	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1410	struct nft_af_info *afi;
1411	struct nft_table *table;
1412	struct nft_chain *chain;
1413	struct net *net = sock_net(skb->sk);
1414	int family = nfmsg->nfgen_family;
1415	struct nft_ctx ctx;
1416
1417	afi = nf_tables_afinfo_lookup(net, family, false);
1418	if (IS_ERR(afi))
1419		return PTR_ERR(afi);
1420
1421	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
1422	if (IS_ERR(table))
1423		return PTR_ERR(table);
1424	if (table->flags & NFT_TABLE_INACTIVE)
1425		return -ENOENT;
1426
1427	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
1428	if (IS_ERR(chain))
1429		return PTR_ERR(chain);
1430	if (chain->flags & NFT_CHAIN_INACTIVE)
1431		return -ENOENT;
1432	if (chain->use > 0)
1433		return -EBUSY;
1434
1435	nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
1436
1437	return nft_delchain(&ctx);
1438}
1439
1440/*
1441 * Expressions
1442 */
1443
1444/**
1445 *	nft_register_expr - register nf_tables expr type
1446 *	@ops: expr type
1447 *
1448 *	Registers the expr type for use with nf_tables. Returns zero on
1449 *	success or a negative errno code otherwise.
1450 */
1451int nft_register_expr(struct nft_expr_type *type)
1452{
1453	nfnl_lock(NFNL_SUBSYS_NFTABLES);
1454	if (type->family == NFPROTO_UNSPEC)
1455		list_add_tail_rcu(&type->list, &nf_tables_expressions);
1456	else
1457		list_add_rcu(&type->list, &nf_tables_expressions);
1458	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
1459	return 0;
1460}
1461EXPORT_SYMBOL_GPL(nft_register_expr);
1462
1463/**
1464 *	nft_unregister_expr - unregister nf_tables expr type
1465 *	@ops: expr type
1466 *
1467 * 	Unregisters the expr typefor use with nf_tables.
1468 */
1469void nft_unregister_expr(struct nft_expr_type *type)
1470{
1471	nfnl_lock(NFNL_SUBSYS_NFTABLES);
1472	list_del_rcu(&type->list);
1473	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
1474}
1475EXPORT_SYMBOL_GPL(nft_unregister_expr);
1476
1477static const struct nft_expr_type *__nft_expr_type_get(u8 family,
1478						       struct nlattr *nla)
1479{
1480	const struct nft_expr_type *type;
1481
1482	list_for_each_entry(type, &nf_tables_expressions, list) {
1483		if (!nla_strcmp(nla, type->name) &&
1484		    (!type->family || type->family == family))
1485			return type;
1486	}
1487	return NULL;
1488}
1489
1490static const struct nft_expr_type *nft_expr_type_get(u8 family,
1491						     struct nlattr *nla)
1492{
1493	const struct nft_expr_type *type;
1494
1495	if (nla == NULL)
1496		return ERR_PTR(-EINVAL);
1497
1498	type = __nft_expr_type_get(family, nla);
1499	if (type != NULL && try_module_get(type->owner))
1500		return type;
1501
1502#ifdef CONFIG_MODULES
1503	if (type == NULL) {
1504		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
1505		request_module("nft-expr-%u-%.*s", family,
1506			       nla_len(nla), (char *)nla_data(nla));
1507		nfnl_lock(NFNL_SUBSYS_NFTABLES);
1508		if (__nft_expr_type_get(family, nla))
1509			return ERR_PTR(-EAGAIN);
1510
1511		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
1512		request_module("nft-expr-%.*s",
1513			       nla_len(nla), (char *)nla_data(nla));
1514		nfnl_lock(NFNL_SUBSYS_NFTABLES);
1515		if (__nft_expr_type_get(family, nla))
1516			return ERR_PTR(-EAGAIN);
1517	}
1518#endif
1519	return ERR_PTR(-ENOENT);
1520}
1521
1522static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = {
1523	[NFTA_EXPR_NAME]	= { .type = NLA_STRING },
1524	[NFTA_EXPR_DATA]	= { .type = NLA_NESTED },
1525};
1526
1527static int nf_tables_fill_expr_info(struct sk_buff *skb,
1528				    const struct nft_expr *expr)
1529{
1530	if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->type->name))
1531		goto nla_put_failure;
1532
1533	if (expr->ops->dump) {
1534		struct nlattr *data = nla_nest_start(skb, NFTA_EXPR_DATA);
1535		if (data == NULL)
1536			goto nla_put_failure;
1537		if (expr->ops->dump(skb, expr) < 0)
1538			goto nla_put_failure;
1539		nla_nest_end(skb, data);
1540	}
1541
1542	return skb->len;
1543
1544nla_put_failure:
1545	return -1;
1546};
1547
1548int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
1549		  const struct nft_expr *expr)
1550{
1551	struct nlattr *nest;
1552
1553	nest = nla_nest_start(skb, attr);
1554	if (!nest)
1555		goto nla_put_failure;
1556	if (nf_tables_fill_expr_info(skb, expr) < 0)
1557		goto nla_put_failure;
1558	nla_nest_end(skb, nest);
1559	return 0;
1560
1561nla_put_failure:
1562	return -1;
1563}
1564
1565struct nft_expr_info {
1566	const struct nft_expr_ops	*ops;
1567	struct nlattr			*tb[NFT_EXPR_MAXATTR + 1];
1568};
1569
1570static int nf_tables_expr_parse(const struct nft_ctx *ctx,
1571				const struct nlattr *nla,
1572				struct nft_expr_info *info)
1573{
1574	const struct nft_expr_type *type;
1575	const struct nft_expr_ops *ops;
1576	struct nlattr *tb[NFTA_EXPR_MAX + 1];
1577	int err;
1578
1579	err = nla_parse_nested(tb, NFTA_EXPR_MAX, nla, nft_expr_policy);
1580	if (err < 0)
1581		return err;
1582
1583	type = nft_expr_type_get(ctx->afi->family, tb[NFTA_EXPR_NAME]);
1584	if (IS_ERR(type))
1585		return PTR_ERR(type);
1586
1587	if (tb[NFTA_EXPR_DATA]) {
1588		err = nla_parse_nested(info->tb, type->maxattr,
1589				       tb[NFTA_EXPR_DATA], type->policy);
1590		if (err < 0)
1591			goto err1;
1592	} else
1593		memset(info->tb, 0, sizeof(info->tb[0]) * (type->maxattr + 1));
1594
1595	if (type->select_ops != NULL) {
1596		ops = type->select_ops(ctx,
1597				       (const struct nlattr * const *)info->tb);
1598		if (IS_ERR(ops)) {
1599			err = PTR_ERR(ops);
1600			goto err1;
1601		}
1602	} else
1603		ops = type->ops;
1604
1605	info->ops = ops;
1606	return 0;
1607
1608err1:
1609	module_put(type->owner);
1610	return err;
1611}
1612
1613static int nf_tables_newexpr(const struct nft_ctx *ctx,
1614			     const struct nft_expr_info *info,
1615			     struct nft_expr *expr)
1616{
1617	const struct nft_expr_ops *ops = info->ops;
1618	int err;
1619
1620	expr->ops = ops;
1621	if (ops->init) {
1622		err = ops->init(ctx, expr, (const struct nlattr **)info->tb);
1623		if (err < 0)
1624			goto err1;
1625	}
1626
1627	return 0;
1628
1629err1:
1630	expr->ops = NULL;
1631	return err;
1632}
1633
1634static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
1635				   struct nft_expr *expr)
1636{
1637	if (expr->ops->destroy)
1638		expr->ops->destroy(ctx, expr);
1639	module_put(expr->ops->type->owner);
1640}
1641
1642struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
1643			       const struct nlattr *nla)
1644{
1645	struct nft_expr_info info;
1646	struct nft_expr *expr;
1647	int err;
1648
1649	err = nf_tables_expr_parse(ctx, nla, &info);
1650	if (err < 0)
1651		goto err1;
1652
1653	err = -ENOMEM;
1654	expr = kzalloc(info.ops->size, GFP_KERNEL);
1655	if (expr == NULL)
1656		goto err2;
1657
1658	err = nf_tables_newexpr(ctx, &info, expr);
1659	if (err < 0)
1660		goto err2;
1661
1662	return expr;
1663err2:
1664	module_put(info.ops->type->owner);
1665err1:
1666	return ERR_PTR(err);
1667}
1668
1669void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr)
1670{
1671	nf_tables_expr_destroy(ctx, expr);
1672	kfree(expr);
1673}
1674
1675/*
1676 * Rules
1677 */
1678
1679static struct nft_rule *__nf_tables_rule_lookup(const struct nft_chain *chain,
1680						u64 handle)
1681{
1682	struct nft_rule *rule;
1683
1684	// FIXME: this sucks
1685	list_for_each_entry(rule, &chain->rules, list) {
1686		if (handle == rule->handle)
1687			return rule;
1688	}
1689
1690	return ERR_PTR(-ENOENT);
1691}
1692
1693static struct nft_rule *nf_tables_rule_lookup(const struct nft_chain *chain,
1694					      const struct nlattr *nla)
1695{
1696	if (nla == NULL)
1697		return ERR_PTR(-EINVAL);
1698
1699	return __nf_tables_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla)));
1700}
1701
1702static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
1703	[NFTA_RULE_TABLE]	= { .type = NLA_STRING },
1704	[NFTA_RULE_CHAIN]	= { .type = NLA_STRING,
1705				    .len = NFT_CHAIN_MAXNAMELEN - 1 },
1706	[NFTA_RULE_HANDLE]	= { .type = NLA_U64 },
1707	[NFTA_RULE_EXPRESSIONS]	= { .type = NLA_NESTED },
1708	[NFTA_RULE_COMPAT]	= { .type = NLA_NESTED },
1709	[NFTA_RULE_POSITION]	= { .type = NLA_U64 },
1710	[NFTA_RULE_USERDATA]	= { .type = NLA_BINARY,
1711				    .len = NFT_USERDATA_MAXLEN },
1712};
1713
1714static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
1715				    u32 portid, u32 seq, int event,
1716				    u32 flags, int family,
1717				    const struct nft_table *table,
1718				    const struct nft_chain *chain,
1719				    const struct nft_rule *rule)
1720{
1721	struct nlmsghdr *nlh;
1722	struct nfgenmsg *nfmsg;
1723	const struct nft_expr *expr, *next;
1724	struct nlattr *list;
1725	const struct nft_rule *prule;
1726	int type = event | NFNL_SUBSYS_NFTABLES << 8;
1727
1728	nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg),
1729			flags);
1730	if (nlh == NULL)
1731		goto nla_put_failure;
1732
1733	nfmsg = nlmsg_data(nlh);
1734	nfmsg->nfgen_family	= family;
1735	nfmsg->version		= NFNETLINK_V0;
1736	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);
1737
1738	if (nla_put_string(skb, NFTA_RULE_TABLE, table->name))
1739		goto nla_put_failure;
1740	if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name))
1741		goto nla_put_failure;
1742	if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle)))
1743		goto nla_put_failure;
1744
1745	if ((event != NFT_MSG_DELRULE) && (rule->list.prev != &chain->rules)) {
1746		prule = list_entry(rule->list.prev, struct nft_rule, list);
1747		if (nla_put_be64(skb, NFTA_RULE_POSITION,
1748				 cpu_to_be64(prule->handle)))
1749			goto nla_put_failure;
1750	}
1751
1752	list = nla_nest_start(skb, NFTA_RULE_EXPRESSIONS);
1753	if (list == NULL)
1754		goto nla_put_failure;
1755	nft_rule_for_each_expr(expr, next, rule) {
1756		if (nft_expr_dump(skb, NFTA_LIST_ELEM, expr) < 0)
1757			goto nla_put_failure;
1758	}
1759	nla_nest_end(skb, list);
1760
1761	if (rule->udata) {
1762		struct nft_userdata *udata = nft_userdata(rule);
1763		if (nla_put(skb, NFTA_RULE_USERDATA, udata->len + 1,
1764			    udata->data) < 0)
1765			goto nla_put_failure;
1766	}
1767
1768	nlmsg_end(skb, nlh);
1769	return 0;
1770
1771nla_put_failure:
1772	nlmsg_trim(skb, nlh);
1773	return -1;
1774}
1775
1776static int nf_tables_rule_notify(const struct nft_ctx *ctx,
1777				 const struct nft_rule *rule,
1778				 int event)
1779{
1780	struct sk_buff *skb;
1781	int err;
1782
1783	if (!ctx->report &&
1784	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
1785		return 0;
1786
1787	err = -ENOBUFS;
1788	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
1789	if (skb == NULL)
1790		goto err;
1791
1792	err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq,
1793				       event, 0, ctx->afi->family, ctx->table,
1794				       ctx->chain, rule);
1795	if (err < 0) {
1796		kfree_skb(skb);
1797		goto err;
1798	}
1799
1800	err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
1801			     ctx->report, GFP_KERNEL);
1802err:
1803	if (err < 0) {
1804		nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
1805				  err);
1806	}
1807	return err;
1808}
1809
1810static int nf_tables_dump_rules(struct sk_buff *skb,
1811				struct netlink_callback *cb)
1812{
1813	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
1814	const struct nft_af_info *afi;
1815	const struct nft_table *table;
1816	const struct nft_chain *chain;
1817	const struct nft_rule *rule;
1818	unsigned int idx = 0, s_idx = cb->args[0];
1819	struct net *net = sock_net(skb->sk);
1820	int family = nfmsg->nfgen_family;
1821
1822	rcu_read_lock();
1823	cb->seq = net->nft.base_seq;
1824
1825	list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
1826		if (family != NFPROTO_UNSPEC && family != afi->family)
1827			continue;
1828
1829		list_for_each_entry_rcu(table, &afi->tables, list) {
1830			list_for_each_entry_rcu(chain, &table->chains, list) {
1831				list_for_each_entry_rcu(rule, &chain->rules, list) {
1832					if (!nft_rule_is_active(net, rule))
1833						goto cont;
1834					if (idx < s_idx)
1835						goto cont;
1836					if (idx > s_idx)
1837						memset(&cb->args[1], 0,
1838						       sizeof(cb->args) - sizeof(cb->args[0]));
1839					if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
1840								      cb->nlh->nlmsg_seq,
1841								      NFT_MSG_NEWRULE,
1842								      NLM_F_MULTI | NLM_F_APPEND,
1843								      afi->family, table, chain, rule) < 0)
1844						goto done;
1845
1846					nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1847cont:
1848					idx++;
1849				}
1850			}
1851		}
1852	}
1853done:
1854	rcu_read_unlock();
1855
1856	cb->args[0] = idx;
1857	return skb->len;
1858}
1859
1860static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
1861			     const struct nlmsghdr *nlh,
1862			     const struct nlattr * const nla[])
1863{
1864	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1865	const struct nft_af_info *afi;
1866	const struct nft_table *table;
1867	const struct nft_chain *chain;
1868	const struct nft_rule *rule;
1869	struct sk_buff *skb2;
1870	struct net *net = sock_net(skb->sk);
1871	int family = nfmsg->nfgen_family;
1872	int err;
1873
1874	if (nlh->nlmsg_flags & NLM_F_DUMP) {
1875		struct netlink_dump_control c = {
1876			.dump = nf_tables_dump_rules,
1877		};
1878		return netlink_dump_start(nlsk, skb, nlh, &c);
1879	}
1880
1881	afi = nf_tables_afinfo_lookup(net, family, false);
1882	if (IS_ERR(afi))
1883		return PTR_ERR(afi);
1884
1885	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
1886	if (IS_ERR(table))
1887		return PTR_ERR(table);
1888	if (table->flags & NFT_TABLE_INACTIVE)
1889		return -ENOENT;
1890
1891	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
1892	if (IS_ERR(chain))
1893		return PTR_ERR(chain);
1894	if (chain->flags & NFT_CHAIN_INACTIVE)
1895		return -ENOENT;
1896
1897	rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
1898	if (IS_ERR(rule))
1899		return PTR_ERR(rule);
1900
1901	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1902	if (!skb2)
1903		return -ENOMEM;
1904
1905	err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid,
1906				       nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
1907				       family, table, chain, rule);
1908	if (err < 0)
1909		goto err;
1910
1911	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
1912
1913err:
1914	kfree_skb(skb2);
1915	return err;
1916}
1917
1918static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
1919				   struct nft_rule *rule)
1920{
1921	struct nft_expr *expr;
1922
1923	/*
1924	 * Careful: some expressions might not be initialized in case this
1925	 * is called on error from nf_tables_newrule().
1926	 */
1927	expr = nft_expr_first(rule);
1928	while (expr->ops && expr != nft_expr_last(rule)) {
1929		nf_tables_expr_destroy(ctx, expr);
1930		expr = nft_expr_next(expr);
1931	}
1932	kfree(rule);
1933}
1934
1935#define NFT_RULE_MAXEXPRS	128
1936
1937static struct nft_expr_info *info;
1938
1939static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
1940			     const struct nlmsghdr *nlh,
1941			     const struct nlattr * const nla[])
1942{
1943	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1944	struct nft_af_info *afi;
1945	struct net *net = sock_net(skb->sk);
1946	struct nft_table *table;
1947	struct nft_chain *chain;
1948	struct nft_rule *rule, *old_rule = NULL;
1949	struct nft_userdata *udata;
1950	struct nft_trans *trans = NULL;
1951	struct nft_expr *expr;
1952	struct nft_ctx ctx;
1953	struct nlattr *tmp;
1954	unsigned int size, i, n, ulen = 0, usize = 0;
1955	int err, rem;
1956	bool create;
1957	u64 handle, pos_handle;
1958
1959	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
1960
1961	afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
1962	if (IS_ERR(afi))
1963		return PTR_ERR(afi);
1964
1965	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
1966	if (IS_ERR(table))
1967		return PTR_ERR(table);
1968
1969	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
1970	if (IS_ERR(chain))
1971		return PTR_ERR(chain);
1972
1973	if (nla[NFTA_RULE_HANDLE]) {
1974		handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
1975		rule = __nf_tables_rule_lookup(chain, handle);
1976		if (IS_ERR(rule))
1977			return PTR_ERR(rule);
1978
1979		if (nlh->nlmsg_flags & NLM_F_EXCL)
1980			return -EEXIST;
1981		if (nlh->nlmsg_flags & NLM_F_REPLACE)
1982			old_rule = rule;
1983		else
1984			return -EOPNOTSUPP;
1985	} else {
1986		if (!create || nlh->nlmsg_flags & NLM_F_REPLACE)
1987			return -EINVAL;
1988		handle = nf_tables_alloc_handle(table);
1989
1990		if (chain->use == UINT_MAX)
1991			return -EOVERFLOW;
1992	}
1993
1994	if (nla[NFTA_RULE_POSITION]) {
1995		if (!(nlh->nlmsg_flags & NLM_F_CREATE))
1996			return -EOPNOTSUPP;
1997
1998		pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
1999		old_rule = __nf_tables_rule_lookup(chain, pos_handle);
2000		if (IS_ERR(old_rule))
2001			return PTR_ERR(old_rule);
2002	}
2003
2004	nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
2005
2006	n = 0;
2007	size = 0;
2008	if (nla[NFTA_RULE_EXPRESSIONS]) {
2009		nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) {
2010			err = -EINVAL;
2011			if (nla_type(tmp) != NFTA_LIST_ELEM)
2012				goto err1;
2013			if (n == NFT_RULE_MAXEXPRS)
2014				goto err1;
2015			err = nf_tables_expr_parse(&ctx, tmp, &info[n]);
2016			if (err < 0)
2017				goto err1;
2018			size += info[n].ops->size;
2019			n++;
2020		}
2021	}
2022	/* Check for overflow of dlen field */
2023	err = -EFBIG;
2024	if (size >= 1 << 12)
2025		goto err1;
2026
2027	if (nla[NFTA_RULE_USERDATA]) {
2028		ulen = nla_len(nla[NFTA_RULE_USERDATA]);
2029		if (ulen > 0)
2030			usize = sizeof(struct nft_userdata) + ulen;
2031	}
2032
2033	err = -ENOMEM;
2034	rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL);
2035	if (rule == NULL)
2036		goto err1;
2037
2038	nft_rule_activate_next(net, rule);
2039
2040	rule->handle = handle;
2041	rule->dlen   = size;
2042	rule->udata  = ulen ? 1 : 0;
2043
2044	if (ulen) {
2045		udata = nft_userdata(rule);
2046		udata->len = ulen - 1;
2047		nla_memcpy(udata->data, nla[NFTA_RULE_USERDATA], ulen);
2048	}
2049
2050	expr = nft_expr_first(rule);
2051	for (i = 0; i < n; i++) {
2052		err = nf_tables_newexpr(&ctx, &info[i], expr);
2053		if (err < 0)
2054			goto err2;
2055		info[i].ops = NULL;
2056		expr = nft_expr_next(expr);
2057	}
2058
2059	if (nlh->nlmsg_flags & NLM_F_REPLACE) {
2060		if (nft_rule_is_active_next(net, old_rule)) {
2061			trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE,
2062						   old_rule);
2063			if (trans == NULL) {
2064				err = -ENOMEM;
2065				goto err2;
2066			}
2067			nft_rule_deactivate_next(net, old_rule);
2068			chain->use--;
2069			list_add_tail_rcu(&rule->list, &old_rule->list);
2070		} else {
2071			err = -ENOENT;
2072			goto err2;
2073		}
2074	} else if (nlh->nlmsg_flags & NLM_F_APPEND)
2075		if (old_rule)
2076			list_add_rcu(&rule->list, &old_rule->list);
2077		else
2078			list_add_tail_rcu(&rule->list, &chain->rules);
2079	else {
2080		if (old_rule)
2081			list_add_tail_rcu(&rule->list, &old_rule->list);
2082		else
2083			list_add_rcu(&rule->list, &chain->rules);
2084	}
2085
2086	if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
2087		err = -ENOMEM;
2088		goto err3;
2089	}
2090	chain->use++;
2091	return 0;
2092
2093err3:
2094	list_del_rcu(&rule->list);
2095err2:
2096	nf_tables_rule_destroy(&ctx, rule);
2097err1:
2098	for (i = 0; i < n; i++) {
2099		if (info[i].ops != NULL)
2100			module_put(info[i].ops->type->owner);
2101	}
2102	return err;
2103}
2104
2105static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
2106			     const struct nlmsghdr *nlh,
2107			     const struct nlattr * const nla[])
2108{
2109	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
2110	struct nft_af_info *afi;
2111	struct net *net = sock_net(skb->sk);
2112	struct nft_table *table;
2113	struct nft_chain *chain = NULL;
2114	struct nft_rule *rule;
2115	int family = nfmsg->nfgen_family, err = 0;
2116	struct nft_ctx ctx;
2117
2118	afi = nf_tables_afinfo_lookup(net, family, false);
2119	if (IS_ERR(afi))
2120		return PTR_ERR(afi);
2121
2122	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
2123	if (IS_ERR(table))
2124		return PTR_ERR(table);
2125	if (table->flags & NFT_TABLE_INACTIVE)
2126		return -ENOENT;
2127
2128	if (nla[NFTA_RULE_CHAIN]) {
2129		chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
2130		if (IS_ERR(chain))
2131			return PTR_ERR(chain);
2132	}
2133
2134	nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
2135
2136	if (chain) {
2137		if (nla[NFTA_RULE_HANDLE]) {
2138			rule = nf_tables_rule_lookup(chain,
2139						     nla[NFTA_RULE_HANDLE]);
2140			if (IS_ERR(rule))
2141				return PTR_ERR(rule);
2142
2143			err = nft_delrule(&ctx, rule);
2144		} else {
2145			err = nft_delrule_by_chain(&ctx);
2146		}
2147	} else {
2148		list_for_each_entry(chain, &table->chains, list) {
2149			ctx.chain = chain;
2150			err = nft_delrule_by_chain(&ctx);
2151			if (err < 0)
2152				break;
2153		}
2154	}
2155
2156	return err;
2157}
2158
2159/*
2160 * Sets
2161 */
2162
2163static LIST_HEAD(nf_tables_set_ops);
2164
2165int nft_register_set(struct nft_set_ops *ops)
2166{
2167	nfnl_lock(NFNL_SUBSYS_NFTABLES);
2168	list_add_tail_rcu(&ops->list, &nf_tables_set_ops);
2169	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
2170	return 0;
2171}
2172EXPORT_SYMBOL_GPL(nft_register_set);
2173
2174void nft_unregister_set(struct nft_set_ops *ops)
2175{
2176	nfnl_lock(NFNL_SUBSYS_NFTABLES);
2177	list_del_rcu(&ops->list);
2178	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
2179}
2180EXPORT_SYMBOL_GPL(nft_unregister_set);
2181
2182/*
2183 * Select a set implementation based on the data characteristics and the
2184 * given policy. The total memory use might not be known if no size is
2185 * given, in that case the amount of memory per element is used.
2186 */
2187static const struct nft_set_ops *
2188nft_select_set_ops(const struct nlattr * const nla[],
2189		   const struct nft_set_desc *desc,
2190		   enum nft_set_policies policy)
2191{
2192	const struct nft_set_ops *ops, *bops;
2193	struct nft_set_estimate est, best;
2194	u32 features;
2195
2196#ifdef CONFIG_MODULES
2197	if (list_empty(&nf_tables_set_ops)) {
2198		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
2199		request_module("nft-set");
2200		nfnl_lock(NFNL_SUBSYS_NFTABLES);
2201		if (!list_empty(&nf_tables_set_ops))
2202			return ERR_PTR(-EAGAIN);
2203	}
2204#endif
2205	features = 0;
2206	if (nla[NFTA_SET_FLAGS] != NULL) {
2207		features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
2208		features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT;
2209	}
2210
2211	bops	   = NULL;
2212	best.size  = ~0;
2213	best.class = ~0;
2214
2215	list_for_each_entry(ops, &nf_tables_set_ops, list) {
2216		if ((ops->features & features) != features)
2217			continue;
2218		if (!ops->estimate(desc, features, &est))
2219			continue;
2220
2221		switch (policy) {
2222		case NFT_SET_POL_PERFORMANCE:
2223			if (est.class < best.class)
2224				break;
2225			if (est.class == best.class && est.size < best.size)
2226				break;
2227			continue;
2228		case NFT_SET_POL_MEMORY:
2229			if (est.size < best.size)
2230				break;
2231			if (est.size == best.size && est.class < best.class)
2232				break;
2233			continue;
2234		default:
2235			break;
2236		}
2237
2238		if (!try_module_get(ops->owner))
2239			continue;
2240		if (bops != NULL)
2241			module_put(bops->owner);
2242
2243		bops = ops;
2244		best = est;
2245	}
2246
2247	if (bops != NULL)
2248		return bops;
2249
2250	return ERR_PTR(-EOPNOTSUPP);
2251}
2252
2253static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
2254	[NFTA_SET_TABLE]		= { .type = NLA_STRING },
2255	[NFTA_SET_NAME]			= { .type = NLA_STRING,
2256					    .len = IFNAMSIZ - 1 },
2257	[NFTA_SET_FLAGS]		= { .type = NLA_U32 },
2258	[NFTA_SET_KEY_TYPE]		= { .type = NLA_U32 },
2259	[NFTA_SET_KEY_LEN]		= { .type = NLA_U32 },
2260	[NFTA_SET_DATA_TYPE]		= { .type = NLA_U32 },
2261	[NFTA_SET_DATA_LEN]		= { .type = NLA_U32 },
2262	[NFTA_SET_POLICY]		= { .type = NLA_U32 },
2263	[NFTA_SET_DESC]			= { .type = NLA_NESTED },
2264	[NFTA_SET_ID]			= { .type = NLA_U32 },
2265	[NFTA_SET_TIMEOUT]		= { .type = NLA_U64 },
2266	[NFTA_SET_GC_INTERVAL]		= { .type = NLA_U32 },
2267};
2268
2269static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
2270	[NFTA_SET_DESC_SIZE]		= { .type = NLA_U32 },
2271};
2272
2273static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
2274				     const struct sk_buff *skb,
2275				     const struct nlmsghdr *nlh,
2276				     const struct nlattr * const nla[])
2277{
2278	struct net *net = sock_net(skb->sk);
2279	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
2280	struct nft_af_info *afi = NULL;
2281	struct nft_table *table = NULL;
2282
2283	if (nfmsg->nfgen_family != NFPROTO_UNSPEC) {
2284		afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
2285		if (IS_ERR(afi))
2286			return PTR_ERR(afi);
2287	}
2288
2289	if (nla[NFTA_SET_TABLE] != NULL) {
2290		if (afi == NULL)
2291			return -EAFNOSUPPORT;
2292
2293		table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
2294		if (IS_ERR(table))
2295			return PTR_ERR(table);
2296		if (table->flags & NFT_TABLE_INACTIVE)
2297			return -ENOENT;
2298	}
2299
2300	nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
2301	return 0;
2302}
2303
2304struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
2305				     const struct nlattr *nla)
2306{
2307	struct nft_set *set;
2308
2309	if (nla == NULL)
2310		return ERR_PTR(-EINVAL);
2311
2312	list_for_each_entry(set, &table->sets, list) {
2313		if (!nla_strcmp(nla, set->name))
2314			return set;
2315	}
2316	return ERR_PTR(-ENOENT);
2317}
2318
2319struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
2320					  const struct nlattr *nla)
2321{
2322	struct nft_trans *trans;
2323	u32 id = ntohl(nla_get_be32(nla));
2324
2325	list_for_each_entry(trans, &net->nft.commit_list, list) {
2326		if (trans->msg_type == NFT_MSG_NEWSET &&
2327		    id == nft_trans_set_id(trans))
2328			return nft_trans_set(trans);
2329	}
2330	return ERR_PTR(-ENOENT);
2331}
2332
2333static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
2334				    const char *name)
2335{
2336	const struct nft_set *i;
2337	const char *p;
2338	unsigned long *inuse;
2339	unsigned int n = 0, min = 0;
2340
2341	p = strnchr(name, IFNAMSIZ, '%');
2342	if (p != NULL) {
2343		if (p[1] != 'd' || strchr(p + 2, '%'))
2344			return -EINVAL;
2345
2346		inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL);
2347		if (inuse == NULL)
2348			return -ENOMEM;
2349cont:
2350		list_for_each_entry(i, &ctx->table->sets, list) {
2351			int tmp;
2352
2353			if (!sscanf(i->name, name, &tmp))
2354				continue;
2355			if (tmp < min || tmp >= min + BITS_PER_BYTE * PAGE_SIZE)
2356				continue;
2357
2358			set_bit(tmp - min, inuse);
2359		}
2360
2361		n = find_first_zero_bit(inuse, BITS_PER_BYTE * PAGE_SIZE);
2362		if (n >= BITS_PER_BYTE * PAGE_SIZE) {
2363			min += BITS_PER_BYTE * PAGE_SIZE;
2364			memset(inuse, 0, PAGE_SIZE);
2365			goto cont;
2366		}
2367		free_page((unsigned long)inuse);
2368	}
2369
2370	snprintf(set->name, sizeof(set->name), name, min + n);
2371	list_for_each_entry(i, &ctx->table->sets, list) {
2372		if (!strcmp(set->name, i->name))
2373			return -ENFILE;
2374	}
2375	return 0;
2376}
2377
2378static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
2379			      const struct nft_set *set, u16 event, u16 flags)
2380{
2381	struct nfgenmsg *nfmsg;
2382	struct nlmsghdr *nlh;
2383	struct nlattr *desc;
2384	u32 portid = ctx->portid;
2385	u32 seq = ctx->seq;
2386
2387	event |= NFNL_SUBSYS_NFTABLES << 8;
2388	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
2389			flags);
2390	if (nlh == NULL)
2391		goto nla_put_failure;
2392
2393	nfmsg = nlmsg_data(nlh);
2394	nfmsg->nfgen_family	= ctx->afi->family;
2395	nfmsg->version		= NFNETLINK_V0;
2396	nfmsg->res_id		= htons(ctx->net->nft.base_seq & 0xffff);
2397
2398	if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
2399		goto nla_put_failure;
2400	if (nla_put_string(skb, NFTA_SET_NAME, set->name))
2401		goto nla_put_failure;
2402	if (set->flags != 0)
2403		if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags)))
2404			goto nla_put_failure;
2405
2406	if (nla_put_be32(skb, NFTA_SET_KEY_TYPE, htonl(set->ktype)))
2407		goto nla_put_failure;
2408	if (nla_put_be32(skb, NFTA_SET_KEY_LEN, htonl(set->klen)))
2409		goto nla_put_failure;
2410	if (set->flags & NFT_SET_MAP) {
2411		if (nla_put_be32(skb, NFTA_SET_DATA_TYPE, htonl(set->dtype)))
2412			goto nla_put_failure;
2413		if (nla_put_be32(skb, NFTA_SET_DATA_LEN, htonl(set->dlen)))
2414			goto nla_put_failure;
2415	}
2416
2417	if (set->timeout &&
2418	    nla_put_be64(skb, NFTA_SET_TIMEOUT, cpu_to_be64(set->timeout)))
2419		goto nla_put_failure;
2420	if (set->gc_int &&
2421	    nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int)))
2422		goto nla_put_failure;
2423
2424	if (set->policy != NFT_SET_POL_PERFORMANCE) {
2425		if (nla_put_be32(skb, NFTA_SET_POLICY, htonl(set->policy)))
2426			goto nla_put_failure;
2427	}
2428
2429	desc = nla_nest_start(skb, NFTA_SET_DESC);
2430	if (desc == NULL)
2431		goto nla_put_failure;
2432	if (set->size &&
2433	    nla_put_be32(skb, NFTA_SET_DESC_SIZE, htonl(set->size)))
2434		goto nla_put_failure;
2435	nla_nest_end(skb, desc);
2436
2437	nlmsg_end(skb, nlh);
2438	return 0;
2439
2440nla_put_failure:
2441	nlmsg_trim(skb, nlh);
2442	return -1;
2443}
2444
2445static int nf_tables_set_notify(const struct nft_ctx *ctx,
2446				const struct nft_set *set,
2447				int event, gfp_t gfp_flags)
2448{
2449	struct sk_buff *skb;
2450	u32 portid = ctx->portid;
2451	int err;
2452
2453	if (!ctx->report &&
2454	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
2455		return 0;
2456
2457	err = -ENOBUFS;
2458	skb = nlmsg_new(NLMSG_GOODSIZE, gfp_flags);
2459	if (skb == NULL)
2460		goto err;
2461
2462	err = nf_tables_fill_set(skb, ctx, set, event, 0);
2463	if (err < 0) {
2464		kfree_skb(skb);
2465		goto err;
2466	}
2467
2468	err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES,
2469			     ctx->report, gfp_flags);
2470err:
2471	if (err < 0)
2472		nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err);
2473	return err;
2474}
2475
2476static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
2477{
2478	const struct nft_set *set;
2479	unsigned int idx, s_idx = cb->args[0];
2480	struct nft_af_info *afi;
2481	struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
2482	struct net *net = sock_net(skb->sk);
2483	int cur_family = cb->args[3];
2484	struct nft_ctx *ctx = cb->data, ctx_set;
2485
2486	if (cb->args[1])
2487		return skb->len;
2488
2489	rcu_read_lock();
2490	cb->seq = net->nft.base_seq;
2491
2492	list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
2493		if (ctx->afi && ctx->afi != afi)
2494			continue;
2495
2496		if (cur_family) {
2497			if (afi->family != cur_family)
2498				continue;
2499
2500			cur_family = 0;
2501		}
2502		list_for_each_entry_rcu(table, &afi->tables, list) {
2503			if (ctx->table && ctx->table != table)
2504				continue;
2505
2506			if (cur_table) {
2507				if (cur_table != table)
2508					continue;
2509
2510				cur_table = NULL;
2511			}
2512			idx = 0;
2513			list_for_each_entry_rcu(set, &table->sets, list) {
2514				if (idx < s_idx)
2515					goto cont;
2516
2517				ctx_set = *ctx;
2518				ctx_set.table = table;
2519				ctx_set.afi = afi;
2520				if (nf_tables_fill_set(skb, &ctx_set, set,
2521						       NFT_MSG_NEWSET,
2522						       NLM_F_MULTI) < 0) {
2523					cb->args[0] = idx;
2524					cb->args[2] = (unsigned long) table;
2525					cb->args[3] = afi->family;
2526					goto done;
2527				}
2528				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2529cont:
2530				idx++;
2531			}
2532			if (s_idx)
2533				s_idx = 0;
2534		}
2535	}
2536	cb->args[1] = 1;
2537done:
2538	rcu_read_unlock();
2539	return skb->len;
2540}
2541
2542static int nf_tables_dump_sets_done(struct netlink_callback *cb)
2543{
2544	kfree(cb->data);
2545	return 0;
2546}
2547
2548static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
2549			    const struct nlmsghdr *nlh,
2550			    const struct nlattr * const nla[])
2551{
2552	const struct nft_set *set;
2553	struct nft_ctx ctx;
2554	struct sk_buff *skb2;
2555	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
2556	int err;
2557
2558	/* Verify existence before starting dump */
2559	err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla);
2560	if (err < 0)
2561		return err;
2562
2563	if (nlh->nlmsg_flags & NLM_F_DUMP) {
2564		struct netlink_dump_control c = {
2565			.dump = nf_tables_dump_sets,
2566			.done = nf_tables_dump_sets_done,
2567		};
2568		struct nft_ctx *ctx_dump;
2569
2570		ctx_dump = kmalloc(sizeof(*ctx_dump), GFP_KERNEL);
2571		if (ctx_dump == NULL)
2572			return -ENOMEM;
2573
2574		*ctx_dump = ctx;
2575		c.data = ctx_dump;
2576
2577		return netlink_dump_start(nlsk, skb, nlh, &c);
2578	}
2579
2580	/* Only accept unspec with dump */
2581	if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
2582		return -EAFNOSUPPORT;
2583
2584	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
2585	if (IS_ERR(set))
2586		return PTR_ERR(set);
2587	if (set->flags & NFT_SET_INACTIVE)
2588		return -ENOENT;
2589
2590	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2591	if (skb2 == NULL)
2592		return -ENOMEM;
2593
2594	err = nf_tables_fill_set(skb2, &ctx, set, NFT_MSG_NEWSET, 0);
2595	if (err < 0)
2596		goto err;
2597
2598	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
2599
2600err:
2601	kfree_skb(skb2);
2602	return err;
2603}
2604
2605static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
2606				    struct nft_set_desc *desc,
2607				    const struct nlattr *nla)
2608{
2609	struct nlattr *da[NFTA_SET_DESC_MAX + 1];
2610	int err;
2611
2612	err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla, nft_set_desc_policy);
2613	if (err < 0)
2614		return err;
2615
2616	if (da[NFTA_SET_DESC_SIZE] != NULL)
2617		desc->size = ntohl(nla_get_be32(da[NFTA_SET_DESC_SIZE]));
2618
2619	return 0;
2620}
2621
2622static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
2623			    const struct nlmsghdr *nlh,
2624			    const struct nlattr * const nla[])
2625{
2626	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
2627	const struct nft_set_ops *ops;
2628	struct nft_af_info *afi;
2629	struct net *net = sock_net(skb->sk);
2630	struct nft_table *table;
2631	struct nft_set *set;
2632	struct nft_ctx ctx;
2633	char name[IFNAMSIZ];
2634	unsigned int size;
2635	bool create;
2636	u64 timeout;
2637	u32 ktype, dtype, flags, policy, gc_int;
2638	struct nft_set_desc desc;
2639	int err;
2640
2641	if (nla[NFTA_SET_TABLE] == NULL ||
2642	    nla[NFTA_SET_NAME] == NULL ||
2643	    nla[NFTA_SET_KEY_LEN] == NULL ||
2644	    nla[NFTA_SET_ID] == NULL)
2645		return -EINVAL;
2646
2647	memset(&desc, 0, sizeof(desc));
2648
2649	ktype = NFT_DATA_VALUE;
2650	if (nla[NFTA_SET_KEY_TYPE] != NULL) {
2651		ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
2652		if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
2653			return -EINVAL;
2654	}
2655
2656	desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
2657	if (desc.klen == 0 || desc.klen > NFT_DATA_VALUE_MAXLEN)
2658		return -EINVAL;
2659
2660	flags = 0;
2661	if (nla[NFTA_SET_FLAGS] != NULL) {
2662		flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
2663		if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
2664			      NFT_SET_INTERVAL | NFT_SET_TIMEOUT |
2665			      NFT_SET_MAP | NFT_SET_EVAL))
2666			return -EINVAL;
2667		/* Only one of both operations is supported */
2668		if ((flags & (NFT_SET_MAP | NFT_SET_EVAL)) ==
2669			     (NFT_SET_MAP | NFT_SET_EVAL))
2670			return -EOPNOTSUPP;
2671	}
2672
2673	dtype = 0;
2674	if (nla[NFTA_SET_DATA_TYPE] != NULL) {
2675		if (!(flags & NFT_SET_MAP))
2676			return -EINVAL;
2677
2678		dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
2679		if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
2680		    dtype != NFT_DATA_VERDICT)
2681			return -EINVAL;
2682
2683		if (dtype != NFT_DATA_VERDICT) {
2684			if (nla[NFTA_SET_DATA_LEN] == NULL)
2685				return -EINVAL;
2686			desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
2687			if (desc.dlen == 0 || desc.dlen > NFT_DATA_VALUE_MAXLEN)
2688				return -EINVAL;
2689		} else
2690			desc.dlen = sizeof(struct nft_verdict);
2691	} else if (flags & NFT_SET_MAP)
2692		return -EINVAL;
2693
2694	timeout = 0;
2695	if (nla[NFTA_SET_TIMEOUT] != NULL) {
2696		if (!(flags & NFT_SET_TIMEOUT))
2697			return -EINVAL;
2698		timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_TIMEOUT]));
2699	}
2700	gc_int = 0;
2701	if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
2702		if (!(flags & NFT_SET_TIMEOUT))
2703			return -EINVAL;
2704		gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
2705	}
2706
2707	policy = NFT_SET_POL_PERFORMANCE;
2708	if (nla[NFTA_SET_POLICY] != NULL)
2709		policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
2710
2711	if (nla[NFTA_SET_DESC] != NULL) {
2712		err = nf_tables_set_desc_parse(&ctx, &desc, nla[NFTA_SET_DESC]);
2713		if (err < 0)
2714			return err;
2715	}
2716
2717	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
2718
2719	afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
2720	if (IS_ERR(afi))
2721		return PTR_ERR(afi);
2722
2723	table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
2724	if (IS_ERR(table))
2725		return PTR_ERR(table);
2726
2727	nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
2728
2729	set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]);
2730	if (IS_ERR(set)) {
2731		if (PTR_ERR(set) != -ENOENT)
2732			return PTR_ERR(set);
2733		set = NULL;
2734	}
2735
2736	if (set != NULL) {
2737		if (nlh->nlmsg_flags & NLM_F_EXCL)
2738			return -EEXIST;
2739		if (nlh->nlmsg_flags & NLM_F_REPLACE)
2740			return -EOPNOTSUPP;
2741		return 0;
2742	}
2743
2744	if (!(nlh->nlmsg_flags & NLM_F_CREATE))
2745		return -ENOENT;
2746
2747	ops = nft_select_set_ops(nla, &desc, policy);
2748	if (IS_ERR(ops))
2749		return PTR_ERR(ops);
2750
2751	size = 0;
2752	if (ops->privsize != NULL)
2753		size = ops->privsize(nla);
2754
2755	err = -ENOMEM;
2756	set = kzalloc(sizeof(*set) + size, GFP_KERNEL);
2757	if (set == NULL)
2758		goto err1;
2759
2760	nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name));
2761	err = nf_tables_set_alloc_name(&ctx, set, name);
2762	if (err < 0)
2763		goto err2;
2764
2765	INIT_LIST_HEAD(&set->bindings);
2766	write_pnet(&set->pnet, net);
2767	set->ops   = ops;
2768	set->ktype = ktype;
2769	set->klen  = desc.klen;
2770	set->dtype = dtype;
2771	set->dlen  = desc.dlen;
2772	set->flags = flags;
2773	set->size  = desc.size;
2774	set->policy = policy;
2775	set->timeout = timeout;
2776	set->gc_int = gc_int;
2777
2778	err = ops->init(set, &desc, nla);
2779	if (err < 0)
2780		goto err2;
2781
2782	err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
2783	if (err < 0)
2784		goto err2;
2785
2786	list_add_tail_rcu(&set->list, &table->sets);
2787	table->use++;
2788	return 0;
2789
2790err2:
2791	kfree(set);
2792err1:
2793	module_put(ops->owner);
2794	return err;
2795}
2796
2797static void nft_set_destroy(struct nft_set *set)
2798{
2799	set->ops->destroy(set);
2800	module_put(set->ops->owner);
2801	kfree(set);
2802}
2803
2804static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
2805{
2806	list_del_rcu(&set->list);
2807	nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC);
2808	nft_set_destroy(set);
2809}
2810
2811static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
2812			    const struct nlmsghdr *nlh,
2813			    const struct nlattr * const nla[])
2814{
2815	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
2816	struct nft_set *set;
2817	struct nft_ctx ctx;
2818	int err;
2819
2820	if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
2821		return -EAFNOSUPPORT;
2822	if (nla[NFTA_SET_TABLE] == NULL)
2823		return -EINVAL;
2824
2825	err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla);
2826	if (err < 0)
2827		return err;
2828
2829	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
2830	if (IS_ERR(set))
2831		return PTR_ERR(set);
2832	if (set->flags & NFT_SET_INACTIVE)
2833		return -ENOENT;
2834	if (!list_empty(&set->bindings))
2835		return -EBUSY;
2836
2837	return nft_delset(&ctx, set);
2838}
2839
2840static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
2841					const struct nft_set *set,
2842					const struct nft_set_iter *iter,
2843					const struct nft_set_elem *elem)
2844{
2845	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
2846	enum nft_registers dreg;
2847
2848	dreg = nft_type_to_reg(set->dtype);
2849	return nft_validate_register_store(ctx, dreg, nft_set_ext_data(ext),
2850					   set->dtype == NFT_DATA_VERDICT ?
2851					   NFT_DATA_VERDICT : NFT_DATA_VALUE,
2852					   set->dlen);
2853}
2854
2855int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
2856		       struct nft_set_binding *binding)
2857{
2858	struct nft_set_binding *i;
2859	struct nft_set_iter iter;
2860
2861	if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
2862		return -EBUSY;
2863
2864	if (binding->flags & NFT_SET_MAP) {
2865		/* If the set is already bound to the same chain all
2866		 * jumps are already validated for that chain.
2867		 */
2868		list_for_each_entry(i, &set->bindings, list) {
2869			if (binding->flags & NFT_SET_MAP &&
2870			    i->chain == binding->chain)
2871				goto bind;
2872		}
2873
2874		iter.skip 	= 0;
2875		iter.count	= 0;
2876		iter.err	= 0;
2877		iter.fn		= nf_tables_bind_check_setelem;
2878
2879		set->ops->walk(ctx, set, &iter);
2880		if (iter.err < 0) {
2881			/* Destroy anonymous sets if binding fails */
2882			if (set->flags & NFT_SET_ANONYMOUS)
2883				nf_tables_set_destroy(ctx, set);
2884
2885			return iter.err;
2886		}
2887	}
2888bind:
2889	binding->chain = ctx->chain;
2890	list_add_tail_rcu(&binding->list, &set->bindings);
2891	return 0;
2892}
2893
2894void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
2895			  struct nft_set_binding *binding)
2896{
2897	list_del_rcu(&binding->list);
2898
2899	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
2900	    !(set->flags & NFT_SET_INACTIVE))
2901		nf_tables_set_destroy(ctx, set);
2902}
2903
2904const struct nft_set_ext_type nft_set_ext_types[] = {
2905	[NFT_SET_EXT_KEY]		= {
2906		.align	= __alignof__(u32),
2907	},
2908	[NFT_SET_EXT_DATA]		= {
2909		.align	= __alignof__(u32),
2910	},
2911	[NFT_SET_EXT_EXPR]		= {
2912		.align	= __alignof__(struct nft_expr),
2913	},
2914	[NFT_SET_EXT_FLAGS]		= {
2915		.len	= sizeof(u8),
2916		.align	= __alignof__(u8),
2917	},
2918	[NFT_SET_EXT_TIMEOUT]		= {
2919		.len	= sizeof(u64),
2920		.align	= __alignof__(u64),
2921	},
2922	[NFT_SET_EXT_EXPIRATION]	= {
2923		.len	= sizeof(unsigned long),
2924		.align	= __alignof__(unsigned long),
2925	},
2926	[NFT_SET_EXT_USERDATA]		= {
2927		.len	= sizeof(struct nft_userdata),
2928		.align	= __alignof__(struct nft_userdata),
2929	},
2930};
2931EXPORT_SYMBOL_GPL(nft_set_ext_types);
2932
2933/*
2934 * Set elements
2935 */
2936
2937static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
2938	[NFTA_SET_ELEM_KEY]		= { .type = NLA_NESTED },
2939	[NFTA_SET_ELEM_DATA]		= { .type = NLA_NESTED },
2940	[NFTA_SET_ELEM_FLAGS]		= { .type = NLA_U32 },
2941	[NFTA_SET_ELEM_TIMEOUT]		= { .type = NLA_U64 },
2942	[NFTA_SET_ELEM_USERDATA]	= { .type = NLA_BINARY,
2943					    .len = NFT_USERDATA_MAXLEN },
2944};
2945
2946static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
2947	[NFTA_SET_ELEM_LIST_TABLE]	= { .type = NLA_STRING },
2948	[NFTA_SET_ELEM_LIST_SET]	= { .type = NLA_STRING },
2949	[NFTA_SET_ELEM_LIST_ELEMENTS]	= { .type = NLA_NESTED },
2950	[NFTA_SET_ELEM_LIST_SET_ID]	= { .type = NLA_U32 },
2951};
2952
2953static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
2954				      const struct sk_buff *skb,
2955				      const struct nlmsghdr *nlh,
2956				      const struct nlattr * const nla[],
2957				      bool trans)
2958{
2959	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
2960	struct nft_af_info *afi;
2961	struct nft_table *table;
2962	struct net *net = sock_net(skb->sk);
2963
2964	afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
2965	if (IS_ERR(afi))
2966		return PTR_ERR(afi);
2967
2968	table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]);
2969	if (IS_ERR(table))
2970		return PTR_ERR(table);
2971	if (!trans && (table->flags & NFT_TABLE_INACTIVE))
2972		return -ENOENT;
2973
2974	nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
2975	return 0;
2976}
2977
2978static int nf_tables_fill_setelem(struct sk_buff *skb,
2979				  const struct nft_set *set,
2980				  const struct nft_set_elem *elem)
2981{
2982	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
2983	unsigned char *b = skb_tail_pointer(skb);
2984	struct nlattr *nest;
2985
2986	nest = nla_nest_start(skb, NFTA_LIST_ELEM);
2987	if (nest == NULL)
2988		goto nla_put_failure;
2989
2990	if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, nft_set_ext_key(ext),
2991			  NFT_DATA_VALUE, set->klen) < 0)
2992		goto nla_put_failure;
2993
2994	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
2995	    nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext),
2996			  set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE,
2997			  set->dlen) < 0)
2998		goto nla_put_failure;
2999
3000	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR) &&
3001	    nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, nft_set_ext_expr(ext)) < 0)
3002		goto nla_put_failure;
3003
3004	if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
3005	    nla_put_be32(skb, NFTA_SET_ELEM_FLAGS,
3006		         htonl(*nft_set_ext_flags(ext))))
3007		goto nla_put_failure;
3008
3009	if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
3010	    nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
3011			 cpu_to_be64(*nft_set_ext_timeout(ext))))
3012		goto nla_put_failure;
3013
3014	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
3015		unsigned long expires, now = jiffies;
3016
3017		expires = *nft_set_ext_expiration(ext);
3018		if (time_before(now, expires))
3019			expires -= now;
3020		else
3021			expires = 0;
3022
3023		if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION,
3024				 cpu_to_be64(jiffies_to_msecs(expires))))
3025			goto nla_put_failure;
3026	}
3027
3028	if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) {
3029		struct nft_userdata *udata;
3030
3031		udata = nft_set_ext_userdata(ext);
3032		if (nla_put(skb, NFTA_SET_ELEM_USERDATA,
3033			    udata->len + 1, udata->data))
3034			goto nla_put_failure;
3035	}
3036
3037	nla_nest_end(skb, nest);
3038	return 0;
3039
3040nla_put_failure:
3041	nlmsg_trim(skb, b);
3042	return -EMSGSIZE;
3043}
3044
3045struct nft_set_dump_args {
3046	const struct netlink_callback	*cb;
3047	struct nft_set_iter		iter;
3048	struct sk_buff			*skb;
3049};
3050
3051static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
3052				  const struct nft_set *set,
3053				  const struct nft_set_iter *iter,
3054				  const struct nft_set_elem *elem)
3055{
3056	struct nft_set_dump_args *args;
3057
3058	args = container_of(iter, struct nft_set_dump_args, iter);
3059	return nf_tables_fill_setelem(args->skb, set, elem);
3060}
3061
3062static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
3063{
3064	const struct nft_set *set;
3065	struct nft_set_dump_args args;
3066	struct nft_ctx ctx;
3067	struct nlattr *nla[NFTA_SET_ELEM_LIST_MAX + 1];
3068	struct nfgenmsg *nfmsg;
3069	struct nlmsghdr *nlh;
3070	struct nlattr *nest;
3071	u32 portid, seq;
3072	int event, err;
3073
3074	err = nlmsg_parse(cb->nlh, sizeof(struct nfgenmsg), nla,
3075			  NFTA_SET_ELEM_LIST_MAX, nft_set_elem_list_policy);
3076	if (err < 0)
3077		return err;
3078
3079	err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla,
3080					 false);
3081	if (err < 0)
3082		return err;
3083
3084	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
3085	if (IS_ERR(set))
3086		return PTR_ERR(set);
3087	if (set->flags & NFT_SET_INACTIVE)
3088		return -ENOENT;
3089
3090	event  = NFT_MSG_NEWSETELEM;
3091	event |= NFNL_SUBSYS_NFTABLES << 8;
3092	portid = NETLINK_CB(cb->skb).portid;
3093	seq    = cb->nlh->nlmsg_seq;
3094
3095	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
3096			NLM_F_MULTI);
3097	if (nlh == NULL)
3098		goto nla_put_failure;
3099
3100	nfmsg = nlmsg_data(nlh);
3101	nfmsg->nfgen_family = ctx.afi->family;
3102	nfmsg->version      = NFNETLINK_V0;
3103	nfmsg->res_id	    = htons(ctx.net->nft.base_seq & 0xffff);
3104
3105	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name))
3106		goto nla_put_failure;
3107	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name))
3108		goto nla_put_failure;
3109
3110	nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS);
3111	if (nest == NULL)
3112		goto nla_put_failure;
3113
3114	args.cb		= cb;
3115	args.skb	= skb;
3116	args.iter.skip	= cb->args[0];
3117	args.iter.count	= 0;
3118	args.iter.err   = 0;
3119	args.iter.fn	= nf_tables_dump_setelem;
3120	set->ops->walk(&ctx, set, &args.iter);
3121
3122	nla_nest_end(skb, nest);
3123	nlmsg_end(skb, nlh);
3124
3125	if (args.iter.err && args.iter.err != -EMSGSIZE)
3126		return args.iter.err;
3127	if (args.iter.count == cb->args[0])
3128		return 0;
3129
3130	cb->args[0] = args.iter.count;
3131	return skb->len;
3132
3133nla_put_failure:
3134	return -ENOSPC;
3135}
3136
3137static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb,
3138				const struct nlmsghdr *nlh,
3139				const struct nlattr * const nla[])
3140{
3141	const struct nft_set *set;
3142	struct nft_ctx ctx;
3143	int err;
3144
3145	err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
3146	if (err < 0)
3147		return err;
3148
3149	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
3150	if (IS_ERR(set))
3151		return PTR_ERR(set);
3152	if (set->flags & NFT_SET_INACTIVE)
3153		return -ENOENT;
3154
3155	if (nlh->nlmsg_flags & NLM_F_DUMP) {
3156		struct netlink_dump_control c = {
3157			.dump = nf_tables_dump_set,
3158		};
3159		return netlink_dump_start(nlsk, skb, nlh, &c);
3160	}
3161	return -EOPNOTSUPP;
3162}
3163
3164static int nf_tables_fill_setelem_info(struct sk_buff *skb,
3165				       const struct nft_ctx *ctx, u32 seq,
3166				       u32 portid, int event, u16 flags,
3167				       const struct nft_set *set,
3168				       const struct nft_set_elem *elem)
3169{
3170	struct nfgenmsg *nfmsg;
3171	struct nlmsghdr *nlh;
3172	struct nlattr *nest;
3173	int err;
3174
3175	event |= NFNL_SUBSYS_NFTABLES << 8;
3176	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
3177			flags);
3178	if (nlh == NULL)
3179		goto nla_put_failure;
3180
3181	nfmsg = nlmsg_data(nlh);
3182	nfmsg->nfgen_family	= ctx->afi->family;
3183	nfmsg->version		= NFNETLINK_V0;
3184	nfmsg->res_id		= htons(ctx->net->nft.base_seq & 0xffff);
3185
3186	if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
3187		goto nla_put_failure;
3188	if (nla_put_string(skb, NFTA_SET_NAME, set->name))
3189		goto nla_put_failure;
3190
3191	nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS);
3192	if (nest == NULL)
3193		goto nla_put_failure;
3194
3195	err = nf_tables_fill_setelem(skb, set, elem);
3196	if (err < 0)
3197		goto nla_put_failure;
3198
3199	nla_nest_end(skb, nest);
3200
3201	nlmsg_end(skb, nlh);
3202	return 0;
3203
3204nla_put_failure:
3205	nlmsg_trim(skb, nlh);
3206	return -1;
3207}
3208
3209static int nf_tables_setelem_notify(const struct nft_ctx *ctx,
3210				    const struct nft_set *set,
3211				    const struct nft_set_elem *elem,
3212				    int event, u16 flags)
3213{
3214	struct net *net = ctx->net;
3215	u32 portid = ctx->portid;
3216	struct sk_buff *skb;
3217	int err;
3218
3219	if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
3220		return 0;
3221
3222	err = -ENOBUFS;
3223	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
3224	if (skb == NULL)
3225		goto err;
3226
3227	err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags,
3228					  set, elem);
3229	if (err < 0) {
3230		kfree_skb(skb);
3231		goto err;
3232	}
3233
3234	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report,
3235			     GFP_KERNEL);
3236err:
3237	if (err < 0)
3238		nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
3239	return err;
3240}
3241
3242static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
3243					      int msg_type,
3244					      struct nft_set *set)
3245{
3246	struct nft_trans *trans;
3247
3248	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_elem));
3249	if (trans == NULL)
3250		return NULL;
3251
3252	nft_trans_elem_set(trans) = set;
3253	return trans;
3254}
3255
3256void *nft_set_elem_init(const struct nft_set *set,
3257			const struct nft_set_ext_tmpl *tmpl,
3258			const u32 *key, const u32 *data,
3259			u64 timeout, gfp_t gfp)
3260{
3261	struct nft_set_ext *ext;
3262	void *elem;
3263
3264	elem = kzalloc(set->ops->elemsize + tmpl->len, gfp);
3265	if (elem == NULL)
3266		return NULL;
3267
3268	ext = nft_set_elem_ext(set, elem);
3269	nft_set_ext_init(ext, tmpl);
3270
3271	memcpy(nft_set_ext_key(ext), key, set->klen);
3272	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
3273		memcpy(nft_set_ext_data(ext), data, set->dlen);
3274	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION))
3275		*nft_set_ext_expiration(ext) =
3276			jiffies + msecs_to_jiffies(timeout);
3277	if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT))
3278		*nft_set_ext_timeout(ext) = timeout;
3279
3280	return elem;
3281}
3282
3283void nft_set_elem_destroy(const struct nft_set *set, void *elem)
3284{
3285	struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
3286
3287	nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE);
3288	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
3289		nft_data_uninit(nft_set_ext_data(ext), set->dtype);
3290	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
3291		nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
3292
3293	kfree(elem);
3294}
3295EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
3296
3297static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
3298			    const struct nlattr *attr)
3299{
3300	struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
3301	struct nft_data_desc d1, d2;
3302	struct nft_set_ext_tmpl tmpl;
3303	struct nft_set_ext *ext;
3304	struct nft_set_elem elem;
3305	struct nft_set_binding *binding;
3306	struct nft_userdata *udata;
3307	struct nft_data data;
3308	enum nft_registers dreg;
3309	struct nft_trans *trans;
3310	u64 timeout;
3311	u32 flags;
3312	u8 ulen;
3313	int err;
3314
3315	err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
3316			       nft_set_elem_policy);
3317	if (err < 0)
3318		return err;
3319
3320	if (nla[NFTA_SET_ELEM_KEY] == NULL)
3321		return -EINVAL;
3322
3323	nft_set_ext_prepare(&tmpl);
3324
3325	flags = 0;
3326	if (nla[NFTA_SET_ELEM_FLAGS] != NULL) {
3327		flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS]));
3328		if (flags & ~NFT_SET_ELEM_INTERVAL_END)
3329			return -EINVAL;
3330		if (!(set->flags & NFT_SET_INTERVAL) &&
3331		    flags & NFT_SET_ELEM_INTERVAL_END)
3332			return -EINVAL;
3333		if (flags != 0)
3334			nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
3335	}
3336
3337	if (set->flags & NFT_SET_MAP) {
3338		if (nla[NFTA_SET_ELEM_DATA] == NULL &&
3339		    !(flags & NFT_SET_ELEM_INTERVAL_END))
3340			return -EINVAL;
3341		if (nla[NFTA_SET_ELEM_DATA] != NULL &&
3342		    flags & NFT_SET_ELEM_INTERVAL_END)
3343			return -EINVAL;
3344	} else {
3345		if (nla[NFTA_SET_ELEM_DATA] != NULL)
3346			return -EINVAL;
3347	}
3348
3349	timeout = 0;
3350	if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) {
3351		if (!(set->flags & NFT_SET_TIMEOUT))
3352			return -EINVAL;
3353		timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_ELEM_TIMEOUT]));
3354	} else if (set->flags & NFT_SET_TIMEOUT) {
3355		timeout = set->timeout;
3356	}
3357
3358	err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &d1,
3359			    nla[NFTA_SET_ELEM_KEY]);
3360	if (err < 0)
3361		goto err1;
3362	err = -EINVAL;
3363	if (d1.type != NFT_DATA_VALUE || d1.len != set->klen)
3364		goto err2;
3365
3366	nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, d1.len);
3367	if (timeout > 0) {
3368		nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
3369		if (timeout != set->timeout)
3370			nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
3371	}
3372
3373	if (nla[NFTA_SET_ELEM_DATA] != NULL) {
3374		err = nft_data_init(ctx, &data, sizeof(data), &d2,
3375				    nla[NFTA_SET_ELEM_DATA]);
3376		if (err < 0)
3377			goto err2;
3378
3379		err = -EINVAL;
3380		if (set->dtype != NFT_DATA_VERDICT && d2.len != set->dlen)
3381			goto err3;
3382
3383		dreg = nft_type_to_reg(set->dtype);
3384		list_for_each_entry(binding, &set->bindings, list) {
3385			struct nft_ctx bind_ctx = {
3386				.afi	= ctx->afi,
3387				.table	= ctx->table,
3388				.chain	= (struct nft_chain *)binding->chain,
3389			};
3390
3391			if (!(binding->flags & NFT_SET_MAP))
3392				continue;
3393
3394			err = nft_validate_register_store(&bind_ctx, dreg,
3395							  &data,
3396							  d2.type, d2.len);
3397			if (err < 0)
3398				goto err3;
3399		}
3400
3401		nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, d2.len);
3402	}
3403
3404	/* The full maximum length of userdata can exceed the maximum
3405	 * offset value (U8_MAX) for following extensions, therefor it
3406	 * must be the last extension added.
3407	 */
3408	ulen = 0;
3409	if (nla[NFTA_SET_ELEM_USERDATA] != NULL) {
3410		ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]);
3411		if (ulen > 0)
3412			nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA,
3413					       ulen);
3414	}
3415
3416	err = -ENOMEM;
3417	elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, data.data,
3418				      timeout, GFP_KERNEL);
3419	if (elem.priv == NULL)
3420		goto err3;
3421
3422	ext = nft_set_elem_ext(set, elem.priv);
3423	if (flags)
3424		*nft_set_ext_flags(ext) = flags;
3425	if (ulen > 0) {
3426		udata = nft_set_ext_userdata(ext);
3427		udata->len = ulen - 1;
3428		nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen);
3429	}
3430
3431	trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
3432	if (trans == NULL)
3433		goto err4;
3434
3435	ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
3436	err = set->ops->insert(set, &elem);
3437	if (err < 0)
3438		goto err5;
3439
3440	nft_trans_elem(trans) = elem;
3441	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
3442	return 0;
3443
3444err5:
3445	kfree(trans);
3446err4:
3447	kfree(elem.priv);
3448err3:
3449	if (nla[NFTA_SET_ELEM_DATA] != NULL)
3450		nft_data_uninit(&data, d2.type);
3451err2:
3452	nft_data_uninit(&elem.key.val, d1.type);
3453err1:
3454	return err;
3455}
3456
3457static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
3458				const struct nlmsghdr *nlh,
3459				const struct nlattr * const nla[])
3460{
3461	struct net *net = sock_net(skb->sk);
3462	const struct nlattr *attr;
3463	struct nft_set *set;
3464	struct nft_ctx ctx;
3465	int rem, err = 0;
3466
3467	if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
3468		return -EINVAL;
3469
3470	err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, true);
3471	if (err < 0)
3472		return err;
3473
3474	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
3475	if (IS_ERR(set)) {
3476		if (nla[NFTA_SET_ELEM_LIST_SET_ID]) {
3477			set = nf_tables_set_lookup_byid(net,
3478					nla[NFTA_SET_ELEM_LIST_SET_ID]);
3479		}
3480		if (IS_ERR(set))
3481			return PTR_ERR(set);
3482	}
3483
3484	if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
3485		return -EBUSY;
3486
3487	nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
3488		if (set->size &&
3489		    !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact))
3490			return -ENFILE;
3491
3492		err = nft_add_set_elem(&ctx, set, attr);
3493		if (err < 0) {
3494			atomic_dec(&set->nelems);
3495			break;
3496		}
3497	}
3498	return err;
3499}
3500
3501static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
3502			   const struct nlattr *attr)
3503{
3504	struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
3505	struct nft_data_desc desc;
3506	struct nft_set_elem elem;
3507	struct nft_trans *trans;
3508	int err;
3509
3510	err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
3511			       nft_set_elem_policy);
3512	if (err < 0)
3513		goto err1;
3514
3515	err = -EINVAL;
3516	if (nla[NFTA_SET_ELEM_KEY] == NULL)
3517		goto err1;
3518
3519	err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc,
3520			    nla[NFTA_SET_ELEM_KEY]);
3521	if (err < 0)
3522		goto err1;
3523
3524	err = -EINVAL;
3525	if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
3526		goto err2;
3527
3528	trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
3529	if (trans == NULL) {
3530		err = -ENOMEM;
3531		goto err2;
3532	}
3533
3534	elem.priv = set->ops->deactivate(set, &elem);
3535	if (elem.priv == NULL) {
3536		err = -ENOENT;
3537		goto err3;
3538	}
3539
3540	nft_trans_elem(trans) = elem;
3541	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
3542	return 0;
3543
3544err3:
3545	kfree(trans);
3546err2:
3547	nft_data_uninit(&elem.key.val, desc.type);
3548err1:
3549	return err;
3550}
3551
3552static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
3553				const struct nlmsghdr *nlh,
3554				const struct nlattr * const nla[])
3555{
3556	const struct nlattr *attr;
3557	struct nft_set *set;
3558	struct nft_ctx ctx;
3559	int rem, err = 0;
3560
3561	if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
3562		return -EINVAL;
3563
3564	err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
3565	if (err < 0)
3566		return err;
3567
3568	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
3569	if (IS_ERR(set))
3570		return PTR_ERR(set);
3571	if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
3572		return -EBUSY;
3573
3574	nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
3575		err = nft_del_setelem(&ctx, set, attr);
3576		if (err < 0)
3577			break;
3578
3579		set->ndeact++;
3580	}
3581	return err;
3582}
3583
3584void nft_set_gc_batch_release(struct rcu_head *rcu)
3585{
3586	struct nft_set_gc_batch *gcb;
3587	unsigned int i;
3588
3589	gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
3590	for (i = 0; i < gcb->head.cnt; i++)
3591		nft_set_elem_destroy(gcb->head.set, gcb->elems[i]);
3592	kfree(gcb);
3593}
3594EXPORT_SYMBOL_GPL(nft_set_gc_batch_release);
3595
3596struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
3597						gfp_t gfp)
3598{
3599	struct nft_set_gc_batch *gcb;
3600
3601	gcb = kzalloc(sizeof(*gcb), gfp);
3602	if (gcb == NULL)
3603		return gcb;
3604	gcb->head.set = set;
3605	return gcb;
3606}
3607EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc);
3608
3609static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
3610				   u32 portid, u32 seq)
3611{
3612	struct nlmsghdr *nlh;
3613	struct nfgenmsg *nfmsg;
3614	int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_NEWGEN;
3615
3616	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0);
3617	if (nlh == NULL)
3618		goto nla_put_failure;
3619
3620	nfmsg = nlmsg_data(nlh);
3621	nfmsg->nfgen_family	= AF_UNSPEC;
3622	nfmsg->version		= NFNETLINK_V0;
3623	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);
3624
3625	if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)))
3626		goto nla_put_failure;
3627
3628	nlmsg_end(skb, nlh);
3629	return 0;
3630
3631nla_put_failure:
3632	nlmsg_trim(skb, nlh);
3633	return -EMSGSIZE;
3634}
3635
3636static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event)
3637{
3638	struct nlmsghdr *nlh = nlmsg_hdr(skb);
3639	struct sk_buff *skb2;
3640	int err;
3641
3642	if (nlmsg_report(nlh) &&
3643	    !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
3644		return 0;
3645
3646	err = -ENOBUFS;
3647	skb2 = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
3648	if (skb2 == NULL)
3649		goto err;
3650
3651	err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid,
3652				      nlh->nlmsg_seq);
3653	if (err < 0) {
3654		kfree_skb(skb2);
3655		goto err;
3656	}
3657
3658	err = nfnetlink_send(skb2, net, NETLINK_CB(skb).portid,
3659			     NFNLGRP_NFTABLES, nlmsg_report(nlh), GFP_KERNEL);
3660err:
3661	if (err < 0) {
3662		nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
3663				  err);
3664	}
3665	return err;
3666}
3667
3668static int nf_tables_getgen(struct sock *nlsk, struct sk_buff *skb,
3669			    const struct nlmsghdr *nlh,
3670			    const struct nlattr * const nla[])
3671{
3672	struct net *net = sock_net(skb->sk);
3673	struct sk_buff *skb2;
3674	int err;
3675
3676	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3677	if (skb2 == NULL)
3678		return -ENOMEM;
3679
3680	err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid,
3681				      nlh->nlmsg_seq);
3682	if (err < 0)
3683		goto err;
3684
3685	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
3686err:
3687	kfree_skb(skb2);
3688	return err;
3689}
3690
3691static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
3692	[NFT_MSG_NEWTABLE] = {
3693		.call_batch	= nf_tables_newtable,
3694		.attr_count	= NFTA_TABLE_MAX,
3695		.policy		= nft_table_policy,
3696	},
3697	[NFT_MSG_GETTABLE] = {
3698		.call		= nf_tables_gettable,
3699		.attr_count	= NFTA_TABLE_MAX,
3700		.policy		= nft_table_policy,
3701	},
3702	[NFT_MSG_DELTABLE] = {
3703		.call_batch	= nf_tables_deltable,
3704		.attr_count	= NFTA_TABLE_MAX,
3705		.policy		= nft_table_policy,
3706	},
3707	[NFT_MSG_NEWCHAIN] = {
3708		.call_batch	= nf_tables_newchain,
3709		.attr_count	= NFTA_CHAIN_MAX,
3710		.policy		= nft_chain_policy,
3711	},
3712	[NFT_MSG_GETCHAIN] = {
3713		.call		= nf_tables_getchain,
3714		.attr_count	= NFTA_CHAIN_MAX,
3715		.policy		= nft_chain_policy,
3716	},
3717	[NFT_MSG_DELCHAIN] = {
3718		.call_batch	= nf_tables_delchain,
3719		.attr_count	= NFTA_CHAIN_MAX,
3720		.policy		= nft_chain_policy,
3721	},
3722	[NFT_MSG_NEWRULE] = {
3723		.call_batch	= nf_tables_newrule,
3724		.attr_count	= NFTA_RULE_MAX,
3725		.policy		= nft_rule_policy,
3726	},
3727	[NFT_MSG_GETRULE] = {
3728		.call		= nf_tables_getrule,
3729		.attr_count	= NFTA_RULE_MAX,
3730		.policy		= nft_rule_policy,
3731	},
3732	[NFT_MSG_DELRULE] = {
3733		.call_batch	= nf_tables_delrule,
3734		.attr_count	= NFTA_RULE_MAX,
3735		.policy		= nft_rule_policy,
3736	},
3737	[NFT_MSG_NEWSET] = {
3738		.call_batch	= nf_tables_newset,
3739		.attr_count	= NFTA_SET_MAX,
3740		.policy		= nft_set_policy,
3741	},
3742	[NFT_MSG_GETSET] = {
3743		.call		= nf_tables_getset,
3744		.attr_count	= NFTA_SET_MAX,
3745		.policy		= nft_set_policy,
3746	},
3747	[NFT_MSG_DELSET] = {
3748		.call_batch	= nf_tables_delset,
3749		.attr_count	= NFTA_SET_MAX,
3750		.policy		= nft_set_policy,
3751	},
3752	[NFT_MSG_NEWSETELEM] = {
3753		.call_batch	= nf_tables_newsetelem,
3754		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
3755		.policy		= nft_set_elem_list_policy,
3756	},
3757	[NFT_MSG_GETSETELEM] = {
3758		.call		= nf_tables_getsetelem,
3759		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
3760		.policy		= nft_set_elem_list_policy,
3761	},
3762	[NFT_MSG_DELSETELEM] = {
3763		.call_batch	= nf_tables_delsetelem,
3764		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
3765		.policy		= nft_set_elem_list_policy,
3766	},
3767	[NFT_MSG_GETGEN] = {
3768		.call		= nf_tables_getgen,
3769	},
3770};
3771
3772static void nft_chain_commit_update(struct nft_trans *trans)
3773{
3774	struct nft_base_chain *basechain;
3775
3776	if (nft_trans_chain_name(trans)[0])
3777		strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans));
3778
3779	if (!(trans->ctx.chain->flags & NFT_BASE_CHAIN))
3780		return;
3781
3782	basechain = nft_base_chain(trans->ctx.chain);
3783	nft_chain_stats_replace(basechain, nft_trans_chain_stats(trans));
3784
3785	switch (nft_trans_chain_policy(trans)) {
3786	case NF_DROP:
3787	case NF_ACCEPT:
3788		basechain->policy = nft_trans_chain_policy(trans);
3789		break;
3790	}
3791}
3792
3793static void nf_tables_commit_release(struct nft_trans *trans)
3794{
3795	switch (trans->msg_type) {
3796	case NFT_MSG_DELTABLE:
3797		nf_tables_table_destroy(&trans->ctx);
3798		break;
3799	case NFT_MSG_DELCHAIN:
3800		nf_tables_chain_destroy(trans->ctx.chain);
3801		break;
3802	case NFT_MSG_DELRULE:
3803		nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
3804		break;
3805	case NFT_MSG_DELSET:
3806		nft_set_destroy(nft_trans_set(trans));
3807		break;
3808	case NFT_MSG_DELSETELEM:
3809		nft_set_elem_destroy(nft_trans_elem_set(trans),
3810				     nft_trans_elem(trans).priv);
3811		break;
3812	}
3813	kfree(trans);
3814}
3815
3816static int nf_tables_commit(struct sk_buff *skb)
3817{
3818	struct net *net = sock_net(skb->sk);
3819	struct nft_trans *trans, *next;
3820	struct nft_trans_elem *te;
3821
3822	/* Bump generation counter, invalidate any dump in progress */
3823	while (++net->nft.base_seq == 0);
3824
3825	/* A new generation has just started */
3826	net->nft.gencursor = nft_gencursor_next(net);
3827
3828	/* Make sure all packets have left the previous generation before
3829	 * purging old rules.
3830	 */
3831	synchronize_rcu();
3832
3833	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
3834		switch (trans->msg_type) {
3835		case NFT_MSG_NEWTABLE:
3836			if (nft_trans_table_update(trans)) {
3837				if (!nft_trans_table_enable(trans)) {
3838					nf_tables_table_disable(trans->ctx.afi,
3839								trans->ctx.table);
3840					trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
3841				}
3842			} else {
3843				trans->ctx.table->flags &= ~NFT_TABLE_INACTIVE;
3844			}
3845			nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE);
3846			nft_trans_destroy(trans);
3847			break;
3848		case NFT_MSG_DELTABLE:
3849			nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE);
3850			break;
3851		case NFT_MSG_NEWCHAIN:
3852			if (nft_trans_chain_update(trans))
3853				nft_chain_commit_update(trans);
3854			else
3855				trans->ctx.chain->flags &= ~NFT_CHAIN_INACTIVE;
3856
3857			nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
3858			nft_trans_destroy(trans);
3859			break;
3860		case NFT_MSG_DELCHAIN:
3861			nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
3862			nf_tables_unregister_hooks(trans->ctx.table,
3863						   trans->ctx.chain,
3864						   trans->ctx.afi->nops);
3865			break;
3866		case NFT_MSG_NEWRULE:
3867			nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
3868			nf_tables_rule_notify(&trans->ctx,
3869					      nft_trans_rule(trans),
3870					      NFT_MSG_NEWRULE);
3871			nft_trans_destroy(trans);
3872			break;
3873		case NFT_MSG_DELRULE:
3874			list_del_rcu(&nft_trans_rule(trans)->list);
3875			nf_tables_rule_notify(&trans->ctx,
3876					      nft_trans_rule(trans),
3877					      NFT_MSG_DELRULE);
3878			break;
3879		case NFT_MSG_NEWSET:
3880			nft_trans_set(trans)->flags &= ~NFT_SET_INACTIVE;
3881			/* This avoids hitting -EBUSY when deleting the table
3882			 * from the transaction.
3883			 */
3884			if (nft_trans_set(trans)->flags & NFT_SET_ANONYMOUS &&
3885			    !list_empty(&nft_trans_set(trans)->bindings))
3886				trans->ctx.table->use--;
3887
3888			nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
3889					     NFT_MSG_NEWSET, GFP_KERNEL);
3890			nft_trans_destroy(trans);
3891			break;
3892		case NFT_MSG_DELSET:
3893			nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
3894					     NFT_MSG_DELSET, GFP_KERNEL);
3895			break;
3896		case NFT_MSG_NEWSETELEM:
3897			te = (struct nft_trans_elem *)trans->data;
3898
3899			te->set->ops->activate(te->set, &te->elem);
3900			nf_tables_setelem_notify(&trans->ctx, te->set,
3901						 &te->elem,
3902						 NFT_MSG_NEWSETELEM, 0);
3903			nft_trans_destroy(trans);
3904			break;
3905		case NFT_MSG_DELSETELEM:
3906			te = (struct nft_trans_elem *)trans->data;
3907
3908			nf_tables_setelem_notify(&trans->ctx, te->set,
3909						 &te->elem,
3910						 NFT_MSG_DELSETELEM, 0);
3911			te->set->ops->remove(te->set, &te->elem);
3912			atomic_dec(&te->set->nelems);
3913			te->set->ndeact--;
3914			break;
3915		}
3916	}
3917
3918	synchronize_rcu();
3919
3920	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
3921		list_del(&trans->list);
3922		nf_tables_commit_release(trans);
3923	}
3924
3925	nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
3926
3927	return 0;
3928}
3929
3930static void nf_tables_abort_release(struct nft_trans *trans)
3931{
3932	switch (trans->msg_type) {
3933	case NFT_MSG_NEWTABLE:
3934		nf_tables_table_destroy(&trans->ctx);
3935		break;
3936	case NFT_MSG_NEWCHAIN:
3937		nf_tables_chain_destroy(trans->ctx.chain);
3938		break;
3939	case NFT_MSG_NEWRULE:
3940		nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
3941		break;
3942	case NFT_MSG_NEWSET:
3943		nft_set_destroy(nft_trans_set(trans));
3944		break;
3945	case NFT_MSG_NEWSETELEM:
3946		nft_set_elem_destroy(nft_trans_elem_set(trans),
3947				     nft_trans_elem(trans).priv);
3948		break;
3949	}
3950	kfree(trans);
3951}
3952
3953static int nf_tables_abort(struct sk_buff *skb)
3954{
3955	struct net *net = sock_net(skb->sk);
3956	struct nft_trans *trans, *next;
3957	struct nft_trans_elem *te;
3958
3959	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
3960		switch (trans->msg_type) {
3961		case NFT_MSG_NEWTABLE:
3962			if (nft_trans_table_update(trans)) {
3963				if (nft_trans_table_enable(trans)) {
3964					nf_tables_table_disable(trans->ctx.afi,
3965								trans->ctx.table);
3966					trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
3967				}
3968				nft_trans_destroy(trans);
3969			} else {
3970				list_del_rcu(&trans->ctx.table->list);
3971			}
3972			break;
3973		case NFT_MSG_DELTABLE:
3974			list_add_tail_rcu(&trans->ctx.table->list,
3975					  &trans->ctx.afi->tables);
3976			nft_trans_destroy(trans);
3977			break;
3978		case NFT_MSG_NEWCHAIN:
3979			if (nft_trans_chain_update(trans)) {
3980				free_percpu(nft_trans_chain_stats(trans));
3981
3982				nft_trans_destroy(trans);
3983			} else {
3984				trans->ctx.table->use--;
3985				list_del_rcu(&trans->ctx.chain->list);
3986				nf_tables_unregister_hooks(trans->ctx.table,
3987							   trans->ctx.chain,
3988							   trans->ctx.afi->nops);
3989			}
3990			break;
3991		case NFT_MSG_DELCHAIN:
3992			trans->ctx.table->use++;
3993			list_add_tail_rcu(&trans->ctx.chain->list,
3994					  &trans->ctx.table->chains);
3995			nft_trans_destroy(trans);
3996			break;
3997		case NFT_MSG_NEWRULE:
3998			trans->ctx.chain->use--;
3999			list_del_rcu(&nft_trans_rule(trans)->list);
4000			break;
4001		case NFT_MSG_DELRULE:
4002			trans->ctx.chain->use++;
4003			nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
4004			nft_trans_destroy(trans);
4005			break;
4006		case NFT_MSG_NEWSET:
4007			trans->ctx.table->use--;
4008			list_del_rcu(&nft_trans_set(trans)->list);
4009			break;
4010		case NFT_MSG_DELSET:
4011			trans->ctx.table->use++;
4012			list_add_tail_rcu(&nft_trans_set(trans)->list,
4013					  &trans->ctx.table->sets);
4014			nft_trans_destroy(trans);
4015			break;
4016		case NFT_MSG_NEWSETELEM:
4017			te = (struct nft_trans_elem *)trans->data;
4018
4019			te->set->ops->remove(te->set, &te->elem);
4020			atomic_dec(&te->set->nelems);
4021			break;
4022		case NFT_MSG_DELSETELEM:
4023			te = (struct nft_trans_elem *)trans->data;
4024
4025			te->set->ops->activate(te->set, &te->elem);
4026			te->set->ndeact--;
4027
4028			nft_trans_destroy(trans);
4029			break;
4030		}
4031	}
4032
4033	synchronize_rcu();
4034
4035	list_for_each_entry_safe_reverse(trans, next,
4036					 &net->nft.commit_list, list) {
4037		list_del(&trans->list);
4038		nf_tables_abort_release(trans);
4039	}
4040
4041	return 0;
4042}
4043
4044static const struct nfnetlink_subsystem nf_tables_subsys = {
4045	.name		= "nf_tables",
4046	.subsys_id	= NFNL_SUBSYS_NFTABLES,
4047	.cb_count	= NFT_MSG_MAX,
4048	.cb		= nf_tables_cb,
4049	.commit		= nf_tables_commit,
4050	.abort		= nf_tables_abort,
4051};
4052
4053int nft_chain_validate_dependency(const struct nft_chain *chain,
4054				  enum nft_chain_type type)
4055{
4056	const struct nft_base_chain *basechain;
4057
4058	if (chain->flags & NFT_BASE_CHAIN) {
4059		basechain = nft_base_chain(chain);
4060		if (basechain->type->type != type)
4061			return -EOPNOTSUPP;
4062	}
4063	return 0;
4064}
4065EXPORT_SYMBOL_GPL(nft_chain_validate_dependency);
4066
4067int nft_chain_validate_hooks(const struct nft_chain *chain,
4068			     unsigned int hook_flags)
4069{
4070	struct nft_base_chain *basechain;
4071
4072	if (chain->flags & NFT_BASE_CHAIN) {
4073		basechain = nft_base_chain(chain);
4074
4075		if ((1 << basechain->ops[0].hooknum) & hook_flags)
4076			return 0;
4077
4078		return -EOPNOTSUPP;
4079	}
4080
4081	return 0;
4082}
4083EXPORT_SYMBOL_GPL(nft_chain_validate_hooks);
4084
4085/*
4086 * Loop detection - walk through the ruleset beginning at the destination chain
4087 * of a new jump until either the source chain is reached (loop) or all
4088 * reachable chains have been traversed.
4089 *
4090 * The loop check is performed whenever a new jump verdict is added to an
4091 * expression or verdict map or a verdict map is bound to a new chain.
4092 */
4093
4094static int nf_tables_check_loops(const struct nft_ctx *ctx,
4095				 const struct nft_chain *chain);
4096
4097static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
4098					const struct nft_set *set,
4099					const struct nft_set_iter *iter,
4100					const struct nft_set_elem *elem)
4101{
4102	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
4103	const struct nft_data *data;
4104
4105	if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
4106	    *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
4107		return 0;
4108
4109	data = nft_set_ext_data(ext);
4110	switch (data->verdict.code) {
4111	case NFT_JUMP:
4112	case NFT_GOTO:
4113		return nf_tables_check_loops(ctx, data->verdict.chain);
4114	default:
4115		return 0;
4116	}
4117}
4118
4119static int nf_tables_check_loops(const struct nft_ctx *ctx,
4120				 const struct nft_chain *chain)
4121{
4122	const struct nft_rule *rule;
4123	const struct nft_expr *expr, *last;
4124	const struct nft_set *set;
4125	struct nft_set_binding *binding;
4126	struct nft_set_iter iter;
4127
4128	if (ctx->chain == chain)
4129		return -ELOOP;
4130
4131	list_for_each_entry(rule, &chain->rules, list) {
4132		nft_rule_for_each_expr(expr, last, rule) {
4133			const struct nft_data *data = NULL;
4134			int err;
4135
4136			if (!expr->ops->validate)
4137				continue;
4138
4139			err = expr->ops->validate(ctx, expr, &data);
4140			if (err < 0)
4141				return err;
4142
4143			if (data == NULL)
4144				continue;
4145
4146			switch (data->verdict.code) {
4147			case NFT_JUMP:
4148			case NFT_GOTO:
4149				err = nf_tables_check_loops(ctx,
4150							data->verdict.chain);
4151				if (err < 0)
4152					return err;
4153			default:
4154				break;
4155			}
4156		}
4157	}
4158
4159	list_for_each_entry(set, &ctx->table->sets, list) {
4160		if (!(set->flags & NFT_SET_MAP) ||
4161		    set->dtype != NFT_DATA_VERDICT)
4162			continue;
4163
4164		list_for_each_entry(binding, &set->bindings, list) {
4165			if (!(binding->flags & NFT_SET_MAP) ||
4166			    binding->chain != chain)
4167				continue;
4168
4169			iter.skip 	= 0;
4170			iter.count	= 0;
4171			iter.err	= 0;
4172			iter.fn		= nf_tables_loop_check_setelem;
4173
4174			set->ops->walk(ctx, set, &iter);
4175			if (iter.err < 0)
4176				return iter.err;
4177		}
4178	}
4179
4180	return 0;
4181}
4182
4183/**
4184 *	nft_parse_register - parse a register value from a netlink attribute
4185 *
4186 *	@attr: netlink attribute
4187 *
4188 *	Parse and translate a register value from a netlink attribute.
4189 *	Registers used to be 128 bit wide, these register numbers will be
4190 *	mapped to the corresponding 32 bit register numbers.
4191 */
4192unsigned int nft_parse_register(const struct nlattr *attr)
4193{
4194	unsigned int reg;
4195
4196	reg = ntohl(nla_get_be32(attr));
4197	switch (reg) {
4198	case NFT_REG_VERDICT...NFT_REG_4:
4199		return reg * NFT_REG_SIZE / NFT_REG32_SIZE;
4200	default:
4201		return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
4202	}
4203}
4204EXPORT_SYMBOL_GPL(nft_parse_register);
4205
4206/**
4207 *	nft_dump_register - dump a register value to a netlink attribute
4208 *
4209 *	@skb: socket buffer
4210 *	@attr: attribute number
4211 *	@reg: register number
4212 *
4213 *	Construct a netlink attribute containing the register number. For
4214 *	compatibility reasons, register numbers being a multiple of 4 are
4215 *	translated to the corresponding 128 bit register numbers.
4216 */
4217int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg)
4218{
4219	if (reg % (NFT_REG_SIZE / NFT_REG32_SIZE) == 0)
4220		reg = reg / (NFT_REG_SIZE / NFT_REG32_SIZE);
4221	else
4222		reg = reg - NFT_REG_SIZE / NFT_REG32_SIZE + NFT_REG32_00;
4223
4224	return nla_put_be32(skb, attr, htonl(reg));
4225}
4226EXPORT_SYMBOL_GPL(nft_dump_register);
4227
4228/**
4229 *	nft_validate_register_load - validate a load from a register
4230 *
4231 *	@reg: the register number
4232 *	@len: the length of the data
4233 *
4234 * 	Validate that the input register is one of the general purpose
4235 * 	registers and that the length of the load is within the bounds.
4236 */
4237int nft_validate_register_load(enum nft_registers reg, unsigned int len)
4238{
4239	if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
4240		return -EINVAL;
4241	if (len == 0)
4242		return -EINVAL;
4243	if (reg * NFT_REG32_SIZE + len > FIELD_SIZEOF(struct nft_regs, data))
4244		return -ERANGE;
4245
4246	return 0;
4247}
4248EXPORT_SYMBOL_GPL(nft_validate_register_load);
4249
4250/**
4251 *	nft_validate_register_store - validate an expressions' register store
4252 *
4253 *	@ctx: context of the expression performing the load
4254 * 	@reg: the destination register number
4255 * 	@data: the data to load
4256 * 	@type: the data type
4257 * 	@len: the length of the data
4258 *
4259 * 	Validate that a data load uses the appropriate data type for
4260 * 	the destination register and the length is within the bounds.
4261 * 	A value of NULL for the data means that its runtime gathered
4262 * 	data.
4263 */
4264int nft_validate_register_store(const struct nft_ctx *ctx,
4265				enum nft_registers reg,
4266				const struct nft_data *data,
4267				enum nft_data_types type, unsigned int len)
4268{
4269	int err;
4270
4271	switch (reg) {
4272	case NFT_REG_VERDICT:
4273		if (type != NFT_DATA_VERDICT)
4274			return -EINVAL;
4275
4276		if (data != NULL &&
4277		    (data->verdict.code == NFT_GOTO ||
4278		     data->verdict.code == NFT_JUMP)) {
4279			err = nf_tables_check_loops(ctx, data->verdict.chain);
4280			if (err < 0)
4281				return err;
4282
4283			if (ctx->chain->level + 1 >
4284			    data->verdict.chain->level) {
4285				if (ctx->chain->level + 1 == NFT_JUMP_STACK_SIZE)
4286					return -EMLINK;
4287				data->verdict.chain->level = ctx->chain->level + 1;
4288			}
4289		}
4290
4291		return 0;
4292	default:
4293		if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
4294			return -EINVAL;
4295		if (len == 0)
4296			return -EINVAL;
4297		if (reg * NFT_REG32_SIZE + len >
4298		    FIELD_SIZEOF(struct nft_regs, data))
4299			return -ERANGE;
4300
4301		if (data != NULL && type != NFT_DATA_VALUE)
4302			return -EINVAL;
4303		return 0;
4304	}
4305}
4306EXPORT_SYMBOL_GPL(nft_validate_register_store);
4307
4308static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
4309	[NFTA_VERDICT_CODE]	= { .type = NLA_U32 },
4310	[NFTA_VERDICT_CHAIN]	= { .type = NLA_STRING,
4311				    .len = NFT_CHAIN_MAXNAMELEN - 1 },
4312};
4313
4314static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
4315			    struct nft_data_desc *desc, const struct nlattr *nla)
4316{
4317	struct nlattr *tb[NFTA_VERDICT_MAX + 1];
4318	struct nft_chain *chain;
4319	int err;
4320
4321	err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy);
4322	if (err < 0)
4323		return err;
4324
4325	if (!tb[NFTA_VERDICT_CODE])
4326		return -EINVAL;
4327	data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
4328
4329	switch (data->verdict.code) {
4330	default:
4331		switch (data->verdict.code & NF_VERDICT_MASK) {
4332		case NF_ACCEPT:
4333		case NF_DROP:
4334		case NF_QUEUE:
4335			break;
4336		default:
4337			return -EINVAL;
4338		}
4339		/* fall through */
4340	case NFT_CONTINUE:
4341	case NFT_BREAK:
4342	case NFT_RETURN:
4343		break;
4344	case NFT_JUMP:
4345	case NFT_GOTO:
4346		if (!tb[NFTA_VERDICT_CHAIN])
4347			return -EINVAL;
4348		chain = nf_tables_chain_lookup(ctx->table,
4349					       tb[NFTA_VERDICT_CHAIN]);
4350		if (IS_ERR(chain))
4351			return PTR_ERR(chain);
4352		if (chain->flags & NFT_BASE_CHAIN)
4353			return -EOPNOTSUPP;
4354
4355		chain->use++;
4356		data->verdict.chain = chain;
4357		break;
4358	}
4359
4360	desc->len = sizeof(data->verdict);
4361	desc->type = NFT_DATA_VERDICT;
4362	return 0;
4363}
4364
4365static void nft_verdict_uninit(const struct nft_data *data)
4366{
4367	switch (data->verdict.code) {
4368	case NFT_JUMP:
4369	case NFT_GOTO:
4370		data->verdict.chain->use--;
4371		break;
4372	}
4373}
4374
4375static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data)
4376{
4377	struct nlattr *nest;
4378
4379	nest = nla_nest_start(skb, NFTA_DATA_VERDICT);
4380	if (!nest)
4381		goto nla_put_failure;
4382
4383	if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict.code)))
4384		goto nla_put_failure;
4385
4386	switch (data->verdict.code) {
4387	case NFT_JUMP:
4388	case NFT_GOTO:
4389		if (nla_put_string(skb, NFTA_VERDICT_CHAIN,
4390				   data->verdict.chain->name))
4391			goto nla_put_failure;
4392	}
4393	nla_nest_end(skb, nest);
4394	return 0;
4395
4396nla_put_failure:
4397	return -1;
4398}
4399
4400static int nft_value_init(const struct nft_ctx *ctx,
4401			  struct nft_data *data, unsigned int size,
4402			  struct nft_data_desc *desc, const struct nlattr *nla)
4403{
4404	unsigned int len;
4405
4406	len = nla_len(nla);
4407	if (len == 0)
4408		return -EINVAL;
4409	if (len > size)
4410		return -EOVERFLOW;
4411
4412	nla_memcpy(data->data, nla, len);
4413	desc->type = NFT_DATA_VALUE;
4414	desc->len  = len;
4415	return 0;
4416}
4417
4418static int nft_value_dump(struct sk_buff *skb, const struct nft_data *data,
4419			  unsigned int len)
4420{
4421	return nla_put(skb, NFTA_DATA_VALUE, len, data->data);
4422}
4423
4424static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
4425	[NFTA_DATA_VALUE]	= { .type = NLA_BINARY },
4426	[NFTA_DATA_VERDICT]	= { .type = NLA_NESTED },
4427};
4428
4429/**
4430 *	nft_data_init - parse nf_tables data netlink attributes
4431 *
4432 *	@ctx: context of the expression using the data
4433 *	@data: destination struct nft_data
4434 *	@size: maximum data length
4435 *	@desc: data description
4436 *	@nla: netlink attribute containing data
4437 *
4438 *	Parse the netlink data attributes and initialize a struct nft_data.
4439 *	The type and length of data are returned in the data description.
4440 *
4441 *	The caller can indicate that it only wants to accept data of type
4442 *	NFT_DATA_VALUE by passing NULL for the ctx argument.
4443 */
4444int nft_data_init(const struct nft_ctx *ctx,
4445		  struct nft_data *data, unsigned int size,
4446		  struct nft_data_desc *desc, const struct nlattr *nla)
4447{
4448	struct nlattr *tb[NFTA_DATA_MAX + 1];
4449	int err;
4450
4451	err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy);
4452	if (err < 0)
4453		return err;
4454
4455	if (tb[NFTA_DATA_VALUE])
4456		return nft_value_init(ctx, data, size, desc,
4457				      tb[NFTA_DATA_VALUE]);
4458	if (tb[NFTA_DATA_VERDICT] && ctx != NULL)
4459		return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]);
4460	return -EINVAL;
4461}
4462EXPORT_SYMBOL_GPL(nft_data_init);
4463
4464/**
4465 *	nft_data_uninit - release a nft_data item
4466 *
4467 *	@data: struct nft_data to release
4468 *	@type: type of data
4469 *
4470 *	Release a nft_data item. NFT_DATA_VALUE types can be silently discarded,
4471 *	all others need to be released by calling this function.
4472 */
4473void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
4474{
4475	if (type < NFT_DATA_VERDICT)
4476		return;
4477	switch (type) {
4478	case NFT_DATA_VERDICT:
4479		return nft_verdict_uninit(data);
4480	default:
4481		WARN_ON(1);
4482	}
4483}
4484EXPORT_SYMBOL_GPL(nft_data_uninit);
4485
4486int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
4487		  enum nft_data_types type, unsigned int len)
4488{
4489	struct nlattr *nest;
4490	int err;
4491
4492	nest = nla_nest_start(skb, attr);
4493	if (nest == NULL)
4494		return -1;
4495
4496	switch (type) {
4497	case NFT_DATA_VALUE:
4498		err = nft_value_dump(skb, data, len);
4499		break;
4500	case NFT_DATA_VERDICT:
4501		err = nft_verdict_dump(skb, data);
4502		break;
4503	default:
4504		err = -EINVAL;
4505		WARN_ON(1);
4506	}
4507
4508	nla_nest_end(skb, nest);
4509	return err;
4510}
4511EXPORT_SYMBOL_GPL(nft_data_dump);
4512
4513static int nf_tables_init_net(struct net *net)
4514{
4515	INIT_LIST_HEAD(&net->nft.af_info);
4516	INIT_LIST_HEAD(&net->nft.commit_list);
4517	net->nft.base_seq = 1;
4518	return 0;
4519}
4520
4521static struct pernet_operations nf_tables_net_ops = {
4522	.init	= nf_tables_init_net,
4523};
4524
4525static int __init nf_tables_module_init(void)
4526{
4527	int err;
4528
4529	info = kmalloc(sizeof(struct nft_expr_info) * NFT_RULE_MAXEXPRS,
4530		       GFP_KERNEL);
4531	if (info == NULL) {
4532		err = -ENOMEM;
4533		goto err1;
4534	}
4535
4536	err = nf_tables_core_module_init();
4537	if (err < 0)
4538		goto err2;
4539
4540	err = nfnetlink_subsys_register(&nf_tables_subsys);
4541	if (err < 0)
4542		goto err3;
4543
4544	pr_info("nf_tables: (c) 2007-2009 Patrick McHardy <kaber@trash.net>\n");
4545	return register_pernet_subsys(&nf_tables_net_ops);
4546err3:
4547	nf_tables_core_module_exit();
4548err2:
4549	kfree(info);
4550err1:
4551	return err;
4552}
4553
4554static void __exit nf_tables_module_exit(void)
4555{
4556	unregister_pernet_subsys(&nf_tables_net_ops);
4557	nfnetlink_subsys_unregister(&nf_tables_subsys);
4558	rcu_barrier();
4559	nf_tables_core_module_exit();
4560	kfree(info);
4561}
4562
4563module_init(nf_tables_module_init);
4564module_exit(nf_tables_module_exit);
4565
4566MODULE_LICENSE("GPL");
4567MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
4568MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES);
4569