1/* 2 * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 */ 9 10#include <linux/module.h> 11#include <linux/init.h> 12#include <linux/kernel.h> 13#include <linux/skbuff.h> 14#include <linux/rtnetlink.h> 15#include <linux/filter.h> 16#include <linux/bpf.h> 17 18#include <net/netlink.h> 19#include <net/pkt_sched.h> 20 21#include <linux/tc_act/tc_bpf.h> 22#include <net/tc_act/tc_bpf.h> 23 24#define BPF_TAB_MASK 15 25#define ACT_BPF_NAME_LEN 256 26 27struct tcf_bpf_cfg { 28 struct bpf_prog *filter; 29 struct sock_filter *bpf_ops; 30 const char *bpf_name; 31 u32 bpf_fd; 32 u16 bpf_num_ops; 33 bool is_ebpf; 34}; 35 36static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, 37 struct tcf_result *res) 38{ 39 struct tcf_bpf *prog = act->priv; 40 int action, filter_res; 41 42 if (unlikely(!skb_mac_header_was_set(skb))) 43 return TC_ACT_UNSPEC; 44 45 spin_lock(&prog->tcf_lock); 46 47 prog->tcf_tm.lastuse = jiffies; 48 bstats_update(&prog->tcf_bstats, skb); 49 50 /* Needed here for accessing maps. */ 51 rcu_read_lock(); 52 filter_res = BPF_PROG_RUN(prog->filter, skb); 53 rcu_read_unlock(); 54 55 /* A BPF program may overwrite the default action opcode. 56 * Similarly as in cls_bpf, if filter_res == -1 we use the 57 * default action specified from tc. 58 * 59 * In case a different well-known TC_ACT opcode has been 60 * returned, it will overwrite the default one. 61 * 62 * For everything else that is unkown, TC_ACT_UNSPEC is 63 * returned. 64 */ 65 switch (filter_res) { 66 case TC_ACT_PIPE: 67 case TC_ACT_RECLASSIFY: 68 case TC_ACT_OK: 69 action = filter_res; 70 break; 71 case TC_ACT_SHOT: 72 action = filter_res; 73 prog->tcf_qstats.drops++; 74 break; 75 case TC_ACT_UNSPEC: 76 action = prog->tcf_action; 77 break; 78 default: 79 action = TC_ACT_UNSPEC; 80 break; 81 } 82 83 spin_unlock(&prog->tcf_lock); 84 return action; 85} 86 87static bool tcf_bpf_is_ebpf(const struct tcf_bpf *prog) 88{ 89 return !prog->bpf_ops; 90} 91 92static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog, 93 struct sk_buff *skb) 94{ 95 struct nlattr *nla; 96 97 if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, prog->bpf_num_ops)) 98 return -EMSGSIZE; 99 100 nla = nla_reserve(skb, TCA_ACT_BPF_OPS, prog->bpf_num_ops * 101 sizeof(struct sock_filter)); 102 if (nla == NULL) 103 return -EMSGSIZE; 104 105 memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla)); 106 107 return 0; 108} 109 110static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog, 111 struct sk_buff *skb) 112{ 113 if (nla_put_u32(skb, TCA_ACT_BPF_FD, prog->bpf_fd)) 114 return -EMSGSIZE; 115 116 if (prog->bpf_name && 117 nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name)) 118 return -EMSGSIZE; 119 120 return 0; 121} 122 123static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act, 124 int bind, int ref) 125{ 126 unsigned char *tp = skb_tail_pointer(skb); 127 struct tcf_bpf *prog = act->priv; 128 struct tc_act_bpf opt = { 129 .index = prog->tcf_index, 130 .refcnt = prog->tcf_refcnt - ref, 131 .bindcnt = prog->tcf_bindcnt - bind, 132 .action = prog->tcf_action, 133 }; 134 struct tcf_t tm; 135 int ret; 136 137 if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt)) 138 goto nla_put_failure; 139 140 if (tcf_bpf_is_ebpf(prog)) 141 ret = tcf_bpf_dump_ebpf_info(prog, skb); 142 else 143 ret = tcf_bpf_dump_bpf_info(prog, skb); 144 if (ret) 145 goto nla_put_failure; 146 147 tm.install = jiffies_to_clock_t(jiffies - prog->tcf_tm.install); 148 tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse); 149 tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires); 150 151 if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm)) 152 goto nla_put_failure; 153 154 return skb->len; 155 156nla_put_failure: 157 nlmsg_trim(skb, tp); 158 return -1; 159} 160 161static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = { 162 [TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) }, 163 [TCA_ACT_BPF_FD] = { .type = NLA_U32 }, 164 [TCA_ACT_BPF_NAME] = { .type = NLA_NUL_STRING, .len = ACT_BPF_NAME_LEN }, 165 [TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 }, 166 [TCA_ACT_BPF_OPS] = { .type = NLA_BINARY, 167 .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, 168}; 169 170static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg) 171{ 172 struct sock_filter *bpf_ops; 173 struct sock_fprog_kern fprog_tmp; 174 struct bpf_prog *fp; 175 u16 bpf_size, bpf_num_ops; 176 int ret; 177 178 bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]); 179 if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) 180 return -EINVAL; 181 182 bpf_size = bpf_num_ops * sizeof(*bpf_ops); 183 if (bpf_size != nla_len(tb[TCA_ACT_BPF_OPS])) 184 return -EINVAL; 185 186 bpf_ops = kzalloc(bpf_size, GFP_KERNEL); 187 if (bpf_ops == NULL) 188 return -ENOMEM; 189 190 memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size); 191 192 fprog_tmp.len = bpf_num_ops; 193 fprog_tmp.filter = bpf_ops; 194 195 ret = bpf_prog_create(&fp, &fprog_tmp); 196 if (ret < 0) { 197 kfree(bpf_ops); 198 return ret; 199 } 200 201 cfg->bpf_ops = bpf_ops; 202 cfg->bpf_num_ops = bpf_num_ops; 203 cfg->filter = fp; 204 cfg->is_ebpf = false; 205 206 return 0; 207} 208 209static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) 210{ 211 struct bpf_prog *fp; 212 char *name = NULL; 213 u32 bpf_fd; 214 215 bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]); 216 217 fp = bpf_prog_get(bpf_fd); 218 if (IS_ERR(fp)) 219 return PTR_ERR(fp); 220 221 if (fp->type != BPF_PROG_TYPE_SCHED_ACT) { 222 bpf_prog_put(fp); 223 return -EINVAL; 224 } 225 226 if (tb[TCA_ACT_BPF_NAME]) { 227 name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]), 228 nla_len(tb[TCA_ACT_BPF_NAME]), 229 GFP_KERNEL); 230 if (!name) { 231 bpf_prog_put(fp); 232 return -ENOMEM; 233 } 234 } 235 236 cfg->bpf_fd = bpf_fd; 237 cfg->bpf_name = name; 238 cfg->filter = fp; 239 cfg->is_ebpf = true; 240 241 return 0; 242} 243 244static void tcf_bpf_cfg_cleanup(const struct tcf_bpf_cfg *cfg) 245{ 246 if (cfg->is_ebpf) 247 bpf_prog_put(cfg->filter); 248 else 249 bpf_prog_destroy(cfg->filter); 250 251 kfree(cfg->bpf_ops); 252 kfree(cfg->bpf_name); 253} 254 255static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, 256 struct tcf_bpf_cfg *cfg) 257{ 258 cfg->is_ebpf = tcf_bpf_is_ebpf(prog); 259 cfg->filter = prog->filter; 260 261 cfg->bpf_ops = prog->bpf_ops; 262 cfg->bpf_name = prog->bpf_name; 263} 264 265static int tcf_bpf_init(struct net *net, struct nlattr *nla, 266 struct nlattr *est, struct tc_action *act, 267 int replace, int bind) 268{ 269 struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; 270 struct tcf_bpf_cfg cfg, old; 271 struct tc_act_bpf *parm; 272 struct tcf_bpf *prog; 273 bool is_bpf, is_ebpf; 274 int ret; 275 276 if (!nla) 277 return -EINVAL; 278 279 ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy); 280 if (ret < 0) 281 return ret; 282 283 is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS]; 284 is_ebpf = tb[TCA_ACT_BPF_FD]; 285 286 if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) || 287 !tb[TCA_ACT_BPF_PARMS]) 288 return -EINVAL; 289 290 parm = nla_data(tb[TCA_ACT_BPF_PARMS]); 291 292 memset(&cfg, 0, sizeof(cfg)); 293 294 ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) : 295 tcf_bpf_init_from_efd(tb, &cfg); 296 if (ret < 0) 297 return ret; 298 299 if (!tcf_hash_check(parm->index, act, bind)) { 300 ret = tcf_hash_create(parm->index, est, act, 301 sizeof(*prog), bind); 302 if (ret < 0) 303 goto destroy_fp; 304 305 ret = ACT_P_CREATED; 306 } else { 307 /* Don't override defaults. */ 308 if (bind) 309 goto destroy_fp; 310 311 tcf_hash_release(act, bind); 312 if (!replace) { 313 ret = -EEXIST; 314 goto destroy_fp; 315 } 316 } 317 318 prog = to_bpf(act); 319 spin_lock_bh(&prog->tcf_lock); 320 321 if (ret != ACT_P_CREATED) 322 tcf_bpf_prog_fill_cfg(prog, &old); 323 324 prog->bpf_ops = cfg.bpf_ops; 325 prog->bpf_name = cfg.bpf_name; 326 327 if (cfg.bpf_num_ops) 328 prog->bpf_num_ops = cfg.bpf_num_ops; 329 if (cfg.bpf_fd) 330 prog->bpf_fd = cfg.bpf_fd; 331 332 prog->tcf_action = parm->action; 333 prog->filter = cfg.filter; 334 335 spin_unlock_bh(&prog->tcf_lock); 336 337 if (ret == ACT_P_CREATED) 338 tcf_hash_insert(act); 339 else 340 tcf_bpf_cfg_cleanup(&old); 341 342 return ret; 343 344destroy_fp: 345 tcf_bpf_cfg_cleanup(&cfg); 346 return ret; 347} 348 349static void tcf_bpf_cleanup(struct tc_action *act, int bind) 350{ 351 struct tcf_bpf_cfg tmp; 352 353 tcf_bpf_prog_fill_cfg(act->priv, &tmp); 354 tcf_bpf_cfg_cleanup(&tmp); 355} 356 357static struct tc_action_ops act_bpf_ops __read_mostly = { 358 .kind = "bpf", 359 .type = TCA_ACT_BPF, 360 .owner = THIS_MODULE, 361 .act = tcf_bpf, 362 .dump = tcf_bpf_dump, 363 .cleanup = tcf_bpf_cleanup, 364 .init = tcf_bpf_init, 365}; 366 367static int __init bpf_init_module(void) 368{ 369 return tcf_register_action(&act_bpf_ops, BPF_TAB_MASK); 370} 371 372static void __exit bpf_cleanup_module(void) 373{ 374 tcf_unregister_action(&act_bpf_ops); 375} 376 377module_init(bpf_init_module); 378module_exit(bpf_cleanup_module); 379 380MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>"); 381MODULE_DESCRIPTION("TC BPF based action"); 382MODULE_LICENSE("GPL v2"); 383