root/net/sched/act_ct.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. tcf_ct_skb_nfct_cached
  2. tcf_ct_skb_network_trim
  3. tcf_ct_skb_nf_family
  4. tcf_ct_ipv4_is_fragment
  5. tcf_ct_ipv6_is_fragment
  6. tcf_ct_handle_fragments
  7. tcf_ct_params_free
  8. ct_nat_execute
  9. tcf_ct_act_set_mark
  10. tcf_ct_act_set_labels
  11. tcf_ct_act_nat
  12. tcf_ct_act
  13. tcf_ct_fill_params_nat
  14. tcf_ct_set_key_val
  15. tcf_ct_fill_params
  16. tcf_ct_init
  17. tcf_ct_cleanup
  18. tcf_ct_dump_key_val
  19. tcf_ct_dump_nat
  20. tcf_ct_dump
  21. tcf_ct_walker
  22. tcf_ct_search
  23. tcf_stats_update
  24. ct_init_net
  25. ct_exit_net
  26. ct_init_module
  27. ct_cleanup_module

   1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2 /* -
   3  * net/sched/act_ct.c  Connection Tracking action
   4  *
   5  * Authors:   Paul Blakey <paulb@mellanox.com>
   6  *            Yossi Kuperman <yossiku@mellanox.com>
   7  *            Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
   8  */
   9 
  10 #include <linux/module.h>
  11 #include <linux/init.h>
  12 #include <linux/kernel.h>
  13 #include <linux/skbuff.h>
  14 #include <linux/rtnetlink.h>
  15 #include <linux/pkt_cls.h>
  16 #include <linux/ip.h>
  17 #include <linux/ipv6.h>
  18 #include <net/netlink.h>
  19 #include <net/pkt_sched.h>
  20 #include <net/pkt_cls.h>
  21 #include <net/act_api.h>
  22 #include <net/ip.h>
  23 #include <net/ipv6_frag.h>
  24 #include <uapi/linux/tc_act/tc_ct.h>
  25 #include <net/tc_act/tc_ct.h>
  26 
  27 #include <net/netfilter/nf_conntrack.h>
  28 #include <net/netfilter/nf_conntrack_core.h>
  29 #include <net/netfilter/nf_conntrack_zones.h>
  30 #include <net/netfilter/nf_conntrack_helper.h>
  31 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
  32 #include <uapi/linux/netfilter/nf_nat.h>
  33 
  34 static struct tc_action_ops act_ct_ops;
  35 static unsigned int ct_net_id;
  36 
  37 struct tc_ct_action_net {
  38         struct tc_action_net tn; /* Must be first */
  39         bool labels;
  40 };
  41 
  42 /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */
  43 static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb,
  44                                    u16 zone_id, bool force)
  45 {
  46         enum ip_conntrack_info ctinfo;
  47         struct nf_conn *ct;
  48 
  49         ct = nf_ct_get(skb, &ctinfo);
  50         if (!ct)
  51                 return false;
  52         if (!net_eq(net, read_pnet(&ct->ct_net)))
  53                 return false;
  54         if (nf_ct_zone(ct)->id != zone_id)
  55                 return false;
  56 
  57         /* Force conntrack entry direction. */
  58         if (force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
  59                 if (nf_ct_is_confirmed(ct))
  60                         nf_ct_kill(ct);
  61 
  62                 nf_conntrack_put(&ct->ct_general);
  63                 nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
  64 
  65                 return false;
  66         }
  67 
  68         return true;
  69 }
  70 
  71 /* Trim the skb to the length specified by the IP/IPv6 header,
  72  * removing any trailing lower-layer padding. This prepares the skb
  73  * for higher-layer processing that assumes skb->len excludes padding
  74  * (such as nf_ip_checksum). The caller needs to pull the skb to the
  75  * network header, and ensure ip_hdr/ipv6_hdr points to valid data.
  76  */
  77 static int tcf_ct_skb_network_trim(struct sk_buff *skb, int family)
  78 {
  79         unsigned int len;
  80         int err;
  81 
  82         switch (family) {
  83         case NFPROTO_IPV4:
  84                 len = ntohs(ip_hdr(skb)->tot_len);
  85                 break;
  86         case NFPROTO_IPV6:
  87                 len = sizeof(struct ipv6hdr)
  88                         + ntohs(ipv6_hdr(skb)->payload_len);
  89                 break;
  90         default:
  91                 len = skb->len;
  92         }
  93 
  94         err = pskb_trim_rcsum(skb, len);
  95 
  96         return err;
  97 }
  98 
  99 static u8 tcf_ct_skb_nf_family(struct sk_buff *skb)
 100 {
 101         u8 family = NFPROTO_UNSPEC;
 102 
 103         switch (skb->protocol) {
 104         case htons(ETH_P_IP):
 105                 family = NFPROTO_IPV4;
 106                 break;
 107         case htons(ETH_P_IPV6):
 108                 family = NFPROTO_IPV6;
 109                 break;
 110         default:
 111                 break;
 112         }
 113 
 114         return family;
 115 }
 116 
 117 static int tcf_ct_ipv4_is_fragment(struct sk_buff *skb, bool *frag)
 118 {
 119         unsigned int len;
 120 
 121         len =  skb_network_offset(skb) + sizeof(struct iphdr);
 122         if (unlikely(skb->len < len))
 123                 return -EINVAL;
 124         if (unlikely(!pskb_may_pull(skb, len)))
 125                 return -ENOMEM;
 126 
 127         *frag = ip_is_fragment(ip_hdr(skb));
 128         return 0;
 129 }
 130 
 131 static int tcf_ct_ipv6_is_fragment(struct sk_buff *skb, bool *frag)
 132 {
 133         unsigned int flags = 0, len, payload_ofs = 0;
 134         unsigned short frag_off;
 135         int nexthdr;
 136 
 137         len =  skb_network_offset(skb) + sizeof(struct ipv6hdr);
 138         if (unlikely(skb->len < len))
 139                 return -EINVAL;
 140         if (unlikely(!pskb_may_pull(skb, len)))
 141                 return -ENOMEM;
 142 
 143         nexthdr = ipv6_find_hdr(skb, &payload_ofs, -1, &frag_off, &flags);
 144         if (unlikely(nexthdr < 0))
 145                 return -EPROTO;
 146 
 147         *frag = flags & IP6_FH_F_FRAG;
 148         return 0;
 149 }
 150 
 151 static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
 152                                    u8 family, u16 zone)
 153 {
 154         enum ip_conntrack_info ctinfo;
 155         struct nf_conn *ct;
 156         int err = 0;
 157         bool frag;
 158 
 159         /* Previously seen (loopback)? Ignore. */
 160         ct = nf_ct_get(skb, &ctinfo);
 161         if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED)
 162                 return 0;
 163 
 164         if (family == NFPROTO_IPV4)
 165                 err = tcf_ct_ipv4_is_fragment(skb, &frag);
 166         else
 167                 err = tcf_ct_ipv6_is_fragment(skb, &frag);
 168         if (err || !frag)
 169                 return err;
 170 
 171         skb_get(skb);
 172 
 173         if (family == NFPROTO_IPV4) {
 174                 enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
 175 
 176                 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
 177                 local_bh_disable();
 178                 err = ip_defrag(net, skb, user);
 179                 local_bh_enable();
 180                 if (err && err != -EINPROGRESS)
 181                         goto out_free;
 182         } else { /* NFPROTO_IPV6 */
 183 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 184                 enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
 185 
 186                 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
 187                 err = nf_ct_frag6_gather(net, skb, user);
 188                 if (err && err != -EINPROGRESS)
 189                         goto out_free;
 190 #else
 191                 err = -EOPNOTSUPP;
 192                 goto out_free;
 193 #endif
 194         }
 195 
 196         skb_clear_hash(skb);
 197         skb->ignore_df = 1;
 198         return err;
 199 
 200 out_free:
 201         kfree_skb(skb);
 202         return err;
 203 }
 204 
 205 static void tcf_ct_params_free(struct rcu_head *head)
 206 {
 207         struct tcf_ct_params *params = container_of(head,
 208                                                     struct tcf_ct_params, rcu);
 209 
 210         if (params->tmpl)
 211                 nf_conntrack_put(&params->tmpl->ct_general);
 212         kfree(params);
 213 }
 214 
 215 #if IS_ENABLED(CONFIG_NF_NAT)
 216 /* Modelled after nf_nat_ipv[46]_fn().
 217  * range is only used for new, uninitialized NAT state.
 218  * Returns either NF_ACCEPT or NF_DROP.
 219  */
 220 static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
 221                           enum ip_conntrack_info ctinfo,
 222                           const struct nf_nat_range2 *range,
 223                           enum nf_nat_manip_type maniptype)
 224 {
 225         int hooknum, err = NF_ACCEPT;
 226 
 227         /* See HOOK2MANIP(). */
 228         if (maniptype == NF_NAT_MANIP_SRC)
 229                 hooknum = NF_INET_LOCAL_IN; /* Source NAT */
 230         else
 231                 hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */
 232 
 233         switch (ctinfo) {
 234         case IP_CT_RELATED:
 235         case IP_CT_RELATED_REPLY:
 236                 if (skb->protocol == htons(ETH_P_IP) &&
 237                     ip_hdr(skb)->protocol == IPPROTO_ICMP) {
 238                         if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
 239                                                            hooknum))
 240                                 err = NF_DROP;
 241                         goto out;
 242                 } else if (IS_ENABLED(CONFIG_IPV6) &&
 243                            skb->protocol == htons(ETH_P_IPV6)) {
 244                         __be16 frag_off;
 245                         u8 nexthdr = ipv6_hdr(skb)->nexthdr;
 246                         int hdrlen = ipv6_skip_exthdr(skb,
 247                                                       sizeof(struct ipv6hdr),
 248                                                       &nexthdr, &frag_off);
 249 
 250                         if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
 251                                 if (!nf_nat_icmpv6_reply_translation(skb, ct,
 252                                                                      ctinfo,
 253                                                                      hooknum,
 254                                                                      hdrlen))
 255                                         err = NF_DROP;
 256                                 goto out;
 257                         }
 258                 }
 259                 /* Non-ICMP, fall thru to initialize if needed. */
 260                 /* fall through */
 261         case IP_CT_NEW:
 262                 /* Seen it before?  This can happen for loopback, retrans,
 263                  * or local packets.
 264                  */
 265                 if (!nf_nat_initialized(ct, maniptype)) {
 266                         /* Initialize according to the NAT action. */
 267                         err = (range && range->flags & NF_NAT_RANGE_MAP_IPS)
 268                                 /* Action is set up to establish a new
 269                                  * mapping.
 270                                  */
 271                                 ? nf_nat_setup_info(ct, range, maniptype)
 272                                 : nf_nat_alloc_null_binding(ct, hooknum);
 273                         if (err != NF_ACCEPT)
 274                                 goto out;
 275                 }
 276                 break;
 277 
 278         case IP_CT_ESTABLISHED:
 279         case IP_CT_ESTABLISHED_REPLY:
 280                 break;
 281 
 282         default:
 283                 err = NF_DROP;
 284                 goto out;
 285         }
 286 
 287         err = nf_nat_packet(ct, ctinfo, hooknum, skb);
 288 out:
 289         return err;
 290 }
 291 #endif /* CONFIG_NF_NAT */
 292 
 293 static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask)
 294 {
 295 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
 296         u32 new_mark;
 297 
 298         if (!mask)
 299                 return;
 300 
 301         new_mark = mark | (ct->mark & ~(mask));
 302         if (ct->mark != new_mark) {
 303                 ct->mark = new_mark;
 304                 if (nf_ct_is_confirmed(ct))
 305                         nf_conntrack_event_cache(IPCT_MARK, ct);
 306         }
 307 #endif
 308 }
 309 
 310 static void tcf_ct_act_set_labels(struct nf_conn *ct,
 311                                   u32 *labels,
 312                                   u32 *labels_m)
 313 {
 314 #if IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)
 315         size_t labels_sz = FIELD_SIZEOF(struct tcf_ct_params, labels);
 316 
 317         if (!memchr_inv(labels_m, 0, labels_sz))
 318                 return;
 319 
 320         nf_connlabels_replace(ct, labels, labels_m, 4);
 321 #endif
 322 }
 323 
 324 static int tcf_ct_act_nat(struct sk_buff *skb,
 325                           struct nf_conn *ct,
 326                           enum ip_conntrack_info ctinfo,
 327                           int ct_action,
 328                           struct nf_nat_range2 *range,
 329                           bool commit)
 330 {
 331 #if IS_ENABLED(CONFIG_NF_NAT)
 332         int err;
 333         enum nf_nat_manip_type maniptype;
 334 
 335         if (!(ct_action & TCA_CT_ACT_NAT))
 336                 return NF_ACCEPT;
 337 
 338         /* Add NAT extension if not confirmed yet. */
 339         if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct))
 340                 return NF_DROP;   /* Can't NAT. */
 341 
 342         if (ctinfo != IP_CT_NEW && (ct->status & IPS_NAT_MASK) &&
 343             (ctinfo != IP_CT_RELATED || commit)) {
 344                 /* NAT an established or related connection like before. */
 345                 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY)
 346                         /* This is the REPLY direction for a connection
 347                          * for which NAT was applied in the forward
 348                          * direction.  Do the reverse NAT.
 349                          */
 350                         maniptype = ct->status & IPS_SRC_NAT
 351                                 ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC;
 352                 else
 353                         maniptype = ct->status & IPS_SRC_NAT
 354                                 ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST;
 355         } else if (ct_action & TCA_CT_ACT_NAT_SRC) {
 356                 maniptype = NF_NAT_MANIP_SRC;
 357         } else if (ct_action & TCA_CT_ACT_NAT_DST) {
 358                 maniptype = NF_NAT_MANIP_DST;
 359         } else {
 360                 return NF_ACCEPT;
 361         }
 362 
 363         err = ct_nat_execute(skb, ct, ctinfo, range, maniptype);
 364         if (err == NF_ACCEPT &&
 365             ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) {
 366                 if (maniptype == NF_NAT_MANIP_SRC)
 367                         maniptype = NF_NAT_MANIP_DST;
 368                 else
 369                         maniptype = NF_NAT_MANIP_SRC;
 370 
 371                 err = ct_nat_execute(skb, ct, ctinfo, range, maniptype);
 372         }
 373         return err;
 374 #else
 375         return NF_ACCEPT;
 376 #endif
 377 }
 378 
 379 static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
 380                       struct tcf_result *res)
 381 {
 382         struct net *net = dev_net(skb->dev);
 383         bool cached, commit, clear, force;
 384         enum ip_conntrack_info ctinfo;
 385         struct tcf_ct *c = to_ct(a);
 386         struct nf_conn *tmpl = NULL;
 387         struct nf_hook_state state;
 388         int nh_ofs, err, retval;
 389         struct tcf_ct_params *p;
 390         struct nf_conn *ct;
 391         u8 family;
 392 
 393         p = rcu_dereference_bh(c->params);
 394 
 395         retval = READ_ONCE(c->tcf_action);
 396         commit = p->ct_action & TCA_CT_ACT_COMMIT;
 397         clear = p->ct_action & TCA_CT_ACT_CLEAR;
 398         force = p->ct_action & TCA_CT_ACT_FORCE;
 399         tmpl = p->tmpl;
 400 
 401         if (clear) {
 402                 ct = nf_ct_get(skb, &ctinfo);
 403                 if (ct) {
 404                         nf_conntrack_put(&ct->ct_general);
 405                         nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
 406                 }
 407 
 408                 goto out;
 409         }
 410 
 411         family = tcf_ct_skb_nf_family(skb);
 412         if (family == NFPROTO_UNSPEC)
 413                 goto drop;
 414 
 415         /* The conntrack module expects to be working at L3.
 416          * We also try to pull the IPv4/6 header to linear area
 417          */
 418         nh_ofs = skb_network_offset(skb);
 419         skb_pull_rcsum(skb, nh_ofs);
 420         err = tcf_ct_handle_fragments(net, skb, family, p->zone);
 421         if (err == -EINPROGRESS) {
 422                 retval = TC_ACT_STOLEN;
 423                 goto out;
 424         }
 425         if (err)
 426                 goto drop;
 427 
 428         err = tcf_ct_skb_network_trim(skb, family);
 429         if (err)
 430                 goto drop;
 431 
 432         /* If we are recirculating packets to match on ct fields and
 433          * committing with a separate ct action, then we don't need to
 434          * actually run the packet through conntrack twice unless it's for a
 435          * different zone.
 436          */
 437         cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force);
 438         if (!cached) {
 439                 /* Associate skb with specified zone. */
 440                 if (tmpl) {
 441                         ct = nf_ct_get(skb, &ctinfo);
 442                         if (skb_nfct(skb))
 443                                 nf_conntrack_put(skb_nfct(skb));
 444                         nf_conntrack_get(&tmpl->ct_general);
 445                         nf_ct_set(skb, tmpl, IP_CT_NEW);
 446                 }
 447 
 448                 state.hook = NF_INET_PRE_ROUTING;
 449                 state.net = net;
 450                 state.pf = family;
 451                 err = nf_conntrack_in(skb, &state);
 452                 if (err != NF_ACCEPT)
 453                         goto out_push;
 454         }
 455 
 456         ct = nf_ct_get(skb, &ctinfo);
 457         if (!ct)
 458                 goto out_push;
 459         nf_ct_deliver_cached_events(ct);
 460 
 461         err = tcf_ct_act_nat(skb, ct, ctinfo, p->ct_action, &p->range, commit);
 462         if (err != NF_ACCEPT)
 463                 goto drop;
 464 
 465         if (commit) {
 466                 tcf_ct_act_set_mark(ct, p->mark, p->mark_mask);
 467                 tcf_ct_act_set_labels(ct, p->labels, p->labels_mask);
 468 
 469                 /* This will take care of sending queued events
 470                  * even if the connection is already confirmed.
 471                  */
 472                 nf_conntrack_confirm(skb);
 473         }
 474 
 475 out_push:
 476         skb_push_rcsum(skb, nh_ofs);
 477 
 478 out:
 479         bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb);
 480         return retval;
 481 
 482 drop:
 483         qstats_drop_inc(this_cpu_ptr(a->cpu_qstats));
 484         return TC_ACT_SHOT;
 485 }
 486 
 487 static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = {
 488         [TCA_CT_UNSPEC] = { .strict_start_type = TCA_CT_UNSPEC + 1 },
 489         [TCA_CT_ACTION] = { .type = NLA_U16 },
 490         [TCA_CT_PARMS] = { .type = NLA_EXACT_LEN, .len = sizeof(struct tc_ct) },
 491         [TCA_CT_ZONE] = { .type = NLA_U16 },
 492         [TCA_CT_MARK] = { .type = NLA_U32 },
 493         [TCA_CT_MARK_MASK] = { .type = NLA_U32 },
 494         [TCA_CT_LABELS] = { .type = NLA_BINARY,
 495                             .len = 128 / BITS_PER_BYTE },
 496         [TCA_CT_LABELS_MASK] = { .type = NLA_BINARY,
 497                                  .len = 128 / BITS_PER_BYTE },
 498         [TCA_CT_NAT_IPV4_MIN] = { .type = NLA_U32 },
 499         [TCA_CT_NAT_IPV4_MAX] = { .type = NLA_U32 },
 500         [TCA_CT_NAT_IPV6_MIN] = { .type = NLA_EXACT_LEN,
 501                                   .len = sizeof(struct in6_addr) },
 502         [TCA_CT_NAT_IPV6_MAX] = { .type = NLA_EXACT_LEN,
 503                                    .len = sizeof(struct in6_addr) },
 504         [TCA_CT_NAT_PORT_MIN] = { .type = NLA_U16 },
 505         [TCA_CT_NAT_PORT_MAX] = { .type = NLA_U16 },
 506 };
 507 
 508 static int tcf_ct_fill_params_nat(struct tcf_ct_params *p,
 509                                   struct tc_ct *parm,
 510                                   struct nlattr **tb,
 511                                   struct netlink_ext_ack *extack)
 512 {
 513         struct nf_nat_range2 *range;
 514 
 515         if (!(p->ct_action & TCA_CT_ACT_NAT))
 516                 return 0;
 517 
 518         if (!IS_ENABLED(CONFIG_NF_NAT)) {
 519                 NL_SET_ERR_MSG_MOD(extack, "Netfilter nat isn't enabled in kernel");
 520                 return -EOPNOTSUPP;
 521         }
 522 
 523         if (!(p->ct_action & (TCA_CT_ACT_NAT_SRC | TCA_CT_ACT_NAT_DST)))
 524                 return 0;
 525 
 526         if ((p->ct_action & TCA_CT_ACT_NAT_SRC) &&
 527             (p->ct_action & TCA_CT_ACT_NAT_DST)) {
 528                 NL_SET_ERR_MSG_MOD(extack, "dnat and snat can't be enabled at the same time");
 529                 return -EOPNOTSUPP;
 530         }
 531 
 532         range = &p->range;
 533         if (tb[TCA_CT_NAT_IPV4_MIN]) {
 534                 struct nlattr *max_attr = tb[TCA_CT_NAT_IPV4_MAX];
 535 
 536                 p->ipv4_range = true;
 537                 range->flags |= NF_NAT_RANGE_MAP_IPS;
 538                 range->min_addr.ip =
 539                         nla_get_in_addr(tb[TCA_CT_NAT_IPV4_MIN]);
 540 
 541                 range->max_addr.ip = max_attr ?
 542                                      nla_get_in_addr(max_attr) :
 543                                      range->min_addr.ip;
 544         } else if (tb[TCA_CT_NAT_IPV6_MIN]) {
 545                 struct nlattr *max_attr = tb[TCA_CT_NAT_IPV6_MAX];
 546 
 547                 p->ipv4_range = false;
 548                 range->flags |= NF_NAT_RANGE_MAP_IPS;
 549                 range->min_addr.in6 =
 550                         nla_get_in6_addr(tb[TCA_CT_NAT_IPV6_MIN]);
 551 
 552                 range->max_addr.in6 = max_attr ?
 553                                       nla_get_in6_addr(max_attr) :
 554                                       range->min_addr.in6;
 555         }
 556 
 557         if (tb[TCA_CT_NAT_PORT_MIN]) {
 558                 range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 559                 range->min_proto.all = nla_get_be16(tb[TCA_CT_NAT_PORT_MIN]);
 560 
 561                 range->max_proto.all = tb[TCA_CT_NAT_PORT_MAX] ?
 562                                        nla_get_be16(tb[TCA_CT_NAT_PORT_MAX]) :
 563                                        range->min_proto.all;
 564         }
 565 
 566         return 0;
 567 }
 568 
 569 static void tcf_ct_set_key_val(struct nlattr **tb,
 570                                void *val, int val_type,
 571                                void *mask, int mask_type,
 572                                int len)
 573 {
 574         if (!tb[val_type])
 575                 return;
 576         nla_memcpy(val, tb[val_type], len);
 577 
 578         if (!mask)
 579                 return;
 580 
 581         if (mask_type == TCA_CT_UNSPEC || !tb[mask_type])
 582                 memset(mask, 0xff, len);
 583         else
 584                 nla_memcpy(mask, tb[mask_type], len);
 585 }
 586 
 587 static int tcf_ct_fill_params(struct net *net,
 588                               struct tcf_ct_params *p,
 589                               struct tc_ct *parm,
 590                               struct nlattr **tb,
 591                               struct netlink_ext_ack *extack)
 592 {
 593         struct tc_ct_action_net *tn = net_generic(net, ct_net_id);
 594         struct nf_conntrack_zone zone;
 595         struct nf_conn *tmpl;
 596         int err;
 597 
 598         p->zone = NF_CT_DEFAULT_ZONE_ID;
 599 
 600         tcf_ct_set_key_val(tb,
 601                            &p->ct_action, TCA_CT_ACTION,
 602                            NULL, TCA_CT_UNSPEC,
 603                            sizeof(p->ct_action));
 604 
 605         if (p->ct_action & TCA_CT_ACT_CLEAR)
 606                 return 0;
 607 
 608         err = tcf_ct_fill_params_nat(p, parm, tb, extack);
 609         if (err)
 610                 return err;
 611 
 612         if (tb[TCA_CT_MARK]) {
 613                 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)) {
 614                         NL_SET_ERR_MSG_MOD(extack, "Conntrack mark isn't enabled.");
 615                         return -EOPNOTSUPP;
 616                 }
 617                 tcf_ct_set_key_val(tb,
 618                                    &p->mark, TCA_CT_MARK,
 619                                    &p->mark_mask, TCA_CT_MARK_MASK,
 620                                    sizeof(p->mark));
 621         }
 622 
 623         if (tb[TCA_CT_LABELS]) {
 624                 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) {
 625                         NL_SET_ERR_MSG_MOD(extack, "Conntrack labels isn't enabled.");
 626                         return -EOPNOTSUPP;
 627                 }
 628 
 629                 if (!tn->labels) {
 630                         NL_SET_ERR_MSG_MOD(extack, "Failed to set connlabel length");
 631                         return -EOPNOTSUPP;
 632                 }
 633                 tcf_ct_set_key_val(tb,
 634                                    p->labels, TCA_CT_LABELS,
 635                                    p->labels_mask, TCA_CT_LABELS_MASK,
 636                                    sizeof(p->labels));
 637         }
 638 
 639         if (tb[TCA_CT_ZONE]) {
 640                 if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)) {
 641                         NL_SET_ERR_MSG_MOD(extack, "Conntrack zones isn't enabled.");
 642                         return -EOPNOTSUPP;
 643                 }
 644 
 645                 tcf_ct_set_key_val(tb,
 646                                    &p->zone, TCA_CT_ZONE,
 647                                    NULL, TCA_CT_UNSPEC,
 648                                    sizeof(p->zone));
 649         }
 650 
 651         if (p->zone == NF_CT_DEFAULT_ZONE_ID)
 652                 return 0;
 653 
 654         nf_ct_zone_init(&zone, p->zone, NF_CT_DEFAULT_ZONE_DIR, 0);
 655         tmpl = nf_ct_tmpl_alloc(net, &zone, GFP_KERNEL);
 656         if (!tmpl) {
 657                 NL_SET_ERR_MSG_MOD(extack, "Failed to allocate conntrack template");
 658                 return -ENOMEM;
 659         }
 660         __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
 661         nf_conntrack_get(&tmpl->ct_general);
 662         p->tmpl = tmpl;
 663 
 664         return 0;
 665 }
 666 
 667 static int tcf_ct_init(struct net *net, struct nlattr *nla,
 668                        struct nlattr *est, struct tc_action **a,
 669                        int replace, int bind, bool rtnl_held,
 670                        struct tcf_proto *tp,
 671                        struct netlink_ext_ack *extack)
 672 {
 673         struct tc_action_net *tn = net_generic(net, ct_net_id);
 674         struct tcf_ct_params *params = NULL;
 675         struct nlattr *tb[TCA_CT_MAX + 1];
 676         struct tcf_chain *goto_ch = NULL;
 677         struct tc_ct *parm;
 678         struct tcf_ct *c;
 679         int err, res = 0;
 680         u32 index;
 681 
 682         if (!nla) {
 683                 NL_SET_ERR_MSG_MOD(extack, "Ct requires attributes to be passed");
 684                 return -EINVAL;
 685         }
 686 
 687         err = nla_parse_nested(tb, TCA_CT_MAX, nla, ct_policy, extack);
 688         if (err < 0)
 689                 return err;
 690 
 691         if (!tb[TCA_CT_PARMS]) {
 692                 NL_SET_ERR_MSG_MOD(extack, "Missing required ct parameters");
 693                 return -EINVAL;
 694         }
 695         parm = nla_data(tb[TCA_CT_PARMS]);
 696         index = parm->index;
 697         err = tcf_idr_check_alloc(tn, &index, a, bind);
 698         if (err < 0)
 699                 return err;
 700 
 701         if (!err) {
 702                 err = tcf_idr_create(tn, index, est, a,
 703                                      &act_ct_ops, bind, true);
 704                 if (err) {
 705                         tcf_idr_cleanup(tn, index);
 706                         return err;
 707                 }
 708                 res = ACT_P_CREATED;
 709         } else {
 710                 if (bind)
 711                         return 0;
 712 
 713                 if (!replace) {
 714                         tcf_idr_release(*a, bind);
 715                         return -EEXIST;
 716                 }
 717         }
 718         err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
 719         if (err < 0)
 720                 goto cleanup;
 721 
 722         c = to_ct(*a);
 723 
 724         params = kzalloc(sizeof(*params), GFP_KERNEL);
 725         if (unlikely(!params)) {
 726                 err = -ENOMEM;
 727                 goto cleanup;
 728         }
 729 
 730         err = tcf_ct_fill_params(net, params, parm, tb, extack);
 731         if (err)
 732                 goto cleanup;
 733 
 734         spin_lock_bh(&c->tcf_lock);
 735         goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
 736         rcu_swap_protected(c->params, params, lockdep_is_held(&c->tcf_lock));
 737         spin_unlock_bh(&c->tcf_lock);
 738 
 739         if (goto_ch)
 740                 tcf_chain_put_by_act(goto_ch);
 741         if (params)
 742                 call_rcu(&params->rcu, tcf_ct_params_free);
 743         if (res == ACT_P_CREATED)
 744                 tcf_idr_insert(tn, *a);
 745 
 746         return res;
 747 
 748 cleanup:
 749         if (goto_ch)
 750                 tcf_chain_put_by_act(goto_ch);
 751         kfree(params);
 752         tcf_idr_release(*a, bind);
 753         return err;
 754 }
 755 
 756 static void tcf_ct_cleanup(struct tc_action *a)
 757 {
 758         struct tcf_ct_params *params;
 759         struct tcf_ct *c = to_ct(a);
 760 
 761         params = rcu_dereference_protected(c->params, 1);
 762         if (params)
 763                 call_rcu(&params->rcu, tcf_ct_params_free);
 764 }
 765 
 766 static int tcf_ct_dump_key_val(struct sk_buff *skb,
 767                                void *val, int val_type,
 768                                void *mask, int mask_type,
 769                                int len)
 770 {
 771         int err;
 772 
 773         if (mask && !memchr_inv(mask, 0, len))
 774                 return 0;
 775 
 776         err = nla_put(skb, val_type, len, val);
 777         if (err)
 778                 return err;
 779 
 780         if (mask_type != TCA_CT_UNSPEC) {
 781                 err = nla_put(skb, mask_type, len, mask);
 782                 if (err)
 783                         return err;
 784         }
 785 
 786         return 0;
 787 }
 788 
 789 static int tcf_ct_dump_nat(struct sk_buff *skb, struct tcf_ct_params *p)
 790 {
 791         struct nf_nat_range2 *range = &p->range;
 792 
 793         if (!(p->ct_action & TCA_CT_ACT_NAT))
 794                 return 0;
 795 
 796         if (!(p->ct_action & (TCA_CT_ACT_NAT_SRC | TCA_CT_ACT_NAT_DST)))
 797                 return 0;
 798 
 799         if (range->flags & NF_NAT_RANGE_MAP_IPS) {
 800                 if (p->ipv4_range) {
 801                         if (nla_put_in_addr(skb, TCA_CT_NAT_IPV4_MIN,
 802                                             range->min_addr.ip))
 803                                 return -1;
 804                         if (nla_put_in_addr(skb, TCA_CT_NAT_IPV4_MAX,
 805                                             range->max_addr.ip))
 806                                 return -1;
 807                 } else {
 808                         if (nla_put_in6_addr(skb, TCA_CT_NAT_IPV6_MIN,
 809                                              &range->min_addr.in6))
 810                                 return -1;
 811                         if (nla_put_in6_addr(skb, TCA_CT_NAT_IPV6_MAX,
 812                                              &range->max_addr.in6))
 813                                 return -1;
 814                 }
 815         }
 816 
 817         if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
 818                 if (nla_put_be16(skb, TCA_CT_NAT_PORT_MIN,
 819                                  range->min_proto.all))
 820                         return -1;
 821                 if (nla_put_be16(skb, TCA_CT_NAT_PORT_MAX,
 822                                  range->max_proto.all))
 823                         return -1;
 824         }
 825 
 826         return 0;
 827 }
 828 
 829 static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a,
 830                               int bind, int ref)
 831 {
 832         unsigned char *b = skb_tail_pointer(skb);
 833         struct tcf_ct *c = to_ct(a);
 834         struct tcf_ct_params *p;
 835 
 836         struct tc_ct opt = {
 837                 .index   = c->tcf_index,
 838                 .refcnt  = refcount_read(&c->tcf_refcnt) - ref,
 839                 .bindcnt = atomic_read(&c->tcf_bindcnt) - bind,
 840         };
 841         struct tcf_t t;
 842 
 843         spin_lock_bh(&c->tcf_lock);
 844         p = rcu_dereference_protected(c->params,
 845                                       lockdep_is_held(&c->tcf_lock));
 846         opt.action = c->tcf_action;
 847 
 848         if (tcf_ct_dump_key_val(skb,
 849                                 &p->ct_action, TCA_CT_ACTION,
 850                                 NULL, TCA_CT_UNSPEC,
 851                                 sizeof(p->ct_action)))
 852                 goto nla_put_failure;
 853 
 854         if (p->ct_action & TCA_CT_ACT_CLEAR)
 855                 goto skip_dump;
 856 
 857         if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
 858             tcf_ct_dump_key_val(skb,
 859                                 &p->mark, TCA_CT_MARK,
 860                                 &p->mark_mask, TCA_CT_MARK_MASK,
 861                                 sizeof(p->mark)))
 862                 goto nla_put_failure;
 863 
 864         if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
 865             tcf_ct_dump_key_val(skb,
 866                                 p->labels, TCA_CT_LABELS,
 867                                 p->labels_mask, TCA_CT_LABELS_MASK,
 868                                 sizeof(p->labels)))
 869                 goto nla_put_failure;
 870 
 871         if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
 872             tcf_ct_dump_key_val(skb,
 873                                 &p->zone, TCA_CT_ZONE,
 874                                 NULL, TCA_CT_UNSPEC,
 875                                 sizeof(p->zone)))
 876                 goto nla_put_failure;
 877 
 878         if (tcf_ct_dump_nat(skb, p))
 879                 goto nla_put_failure;
 880 
 881 skip_dump:
 882         if (nla_put(skb, TCA_CT_PARMS, sizeof(opt), &opt))
 883                 goto nla_put_failure;
 884 
 885         tcf_tm_dump(&t, &c->tcf_tm);
 886         if (nla_put_64bit(skb, TCA_CT_TM, sizeof(t), &t, TCA_CT_PAD))
 887                 goto nla_put_failure;
 888         spin_unlock_bh(&c->tcf_lock);
 889 
 890         return skb->len;
 891 nla_put_failure:
 892         spin_unlock_bh(&c->tcf_lock);
 893         nlmsg_trim(skb, b);
 894         return -1;
 895 }
 896 
 897 static int tcf_ct_walker(struct net *net, struct sk_buff *skb,
 898                          struct netlink_callback *cb, int type,
 899                          const struct tc_action_ops *ops,
 900                          struct netlink_ext_ack *extack)
 901 {
 902         struct tc_action_net *tn = net_generic(net, ct_net_id);
 903 
 904         return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 905 }
 906 
 907 static int tcf_ct_search(struct net *net, struct tc_action **a, u32 index)
 908 {
 909         struct tc_action_net *tn = net_generic(net, ct_net_id);
 910 
 911         return tcf_idr_search(tn, a, index);
 912 }
 913 
 914 static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
 915                              u64 lastuse, bool hw)
 916 {
 917         struct tcf_ct *c = to_ct(a);
 918 
 919         _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
 920 
 921         if (hw)
 922                 _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
 923                                    bytes, packets);
 924         c->tcf_tm.lastuse = max_t(u64, c->tcf_tm.lastuse, lastuse);
 925 }
 926 
 927 static struct tc_action_ops act_ct_ops = {
 928         .kind           =       "ct",
 929         .id             =       TCA_ID_CT,
 930         .owner          =       THIS_MODULE,
 931         .act            =       tcf_ct_act,
 932         .dump           =       tcf_ct_dump,
 933         .init           =       tcf_ct_init,
 934         .cleanup        =       tcf_ct_cleanup,
 935         .walk           =       tcf_ct_walker,
 936         .lookup         =       tcf_ct_search,
 937         .stats_update   =       tcf_stats_update,
 938         .size           =       sizeof(struct tcf_ct),
 939 };
 940 
 941 static __net_init int ct_init_net(struct net *net)
 942 {
 943         unsigned int n_bits = FIELD_SIZEOF(struct tcf_ct_params, labels) * 8;
 944         struct tc_ct_action_net *tn = net_generic(net, ct_net_id);
 945 
 946         if (nf_connlabels_get(net, n_bits - 1)) {
 947                 tn->labels = false;
 948                 pr_err("act_ct: Failed to set connlabels length");
 949         } else {
 950                 tn->labels = true;
 951         }
 952 
 953         return tc_action_net_init(net, &tn->tn, &act_ct_ops);
 954 }
 955 
 956 static void __net_exit ct_exit_net(struct list_head *net_list)
 957 {
 958         struct net *net;
 959 
 960         rtnl_lock();
 961         list_for_each_entry(net, net_list, exit_list) {
 962                 struct tc_ct_action_net *tn = net_generic(net, ct_net_id);
 963 
 964                 if (tn->labels)
 965                         nf_connlabels_put(net);
 966         }
 967         rtnl_unlock();
 968 
 969         tc_action_net_exit(net_list, ct_net_id);
 970 }
 971 
 972 static struct pernet_operations ct_net_ops = {
 973         .init = ct_init_net,
 974         .exit_batch = ct_exit_net,
 975         .id   = &ct_net_id,
 976         .size = sizeof(struct tc_ct_action_net),
 977 };
 978 
 979 static int __init ct_init_module(void)
 980 {
 981         return tcf_register_action(&act_ct_ops, &ct_net_ops);
 982 }
 983 
 984 static void __exit ct_cleanup_module(void)
 985 {
 986         tcf_unregister_action(&act_ct_ops, &ct_net_ops);
 987 }
 988 
 989 module_init(ct_init_module);
 990 module_exit(ct_cleanup_module);
 991 MODULE_AUTHOR("Paul Blakey <paulb@mellanox.com>");
 992 MODULE_AUTHOR("Yossi Kuperman <yossiku@mellanox.com>");
 993 MODULE_AUTHOR("Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>");
 994 MODULE_DESCRIPTION("Connection tracking action");
 995 MODULE_LICENSE("GPL v2");
 996 

/* [<][>][^][v][top][bottom][index][help] */