root/net/openvswitch/vport.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ovs_vport_init
  2. ovs_vport_exit
  3. hash_bucket
  4. __ovs_vport_ops_register
  5. ovs_vport_ops_unregister
  6. ovs_vport_locate
  7. ovs_vport_alloc
  8. ovs_vport_free
  9. ovs_vport_lookup
  10. ovs_vport_add
  11. ovs_vport_set_options
  12. ovs_vport_del
  13. ovs_vport_get_stats
  14. ovs_vport_get_options
  15. ovs_vport_set_upcall_portids
  16. ovs_vport_get_upcall_portids
  17. ovs_vport_find_upcall_portid
  18. ovs_vport_receive
  19. packet_length
  20. ovs_vport_send

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (c) 2007-2014 Nicira, Inc.
   4  */
   5 
   6 #include <linux/etherdevice.h>
   7 #include <linux/if.h>
   8 #include <linux/if_vlan.h>
   9 #include <linux/jhash.h>
  10 #include <linux/kernel.h>
  11 #include <linux/list.h>
  12 #include <linux/mutex.h>
  13 #include <linux/percpu.h>
  14 #include <linux/rcupdate.h>
  15 #include <linux/rtnetlink.h>
  16 #include <linux/compat.h>
  17 #include <net/net_namespace.h>
  18 #include <linux/module.h>
  19 
  20 #include "datapath.h"
  21 #include "vport.h"
  22 #include "vport-internal_dev.h"
  23 
  24 static LIST_HEAD(vport_ops_list);
  25 
  26 /* Protected by RCU read lock for reading, ovs_mutex for writing. */
  27 static struct hlist_head *dev_table;
  28 #define VPORT_HASH_BUCKETS 1024
  29 
  30 /**
  31  *      ovs_vport_init - initialize vport subsystem
  32  *
  33  * Called at module load time to initialize the vport subsystem.
  34  */
  35 int ovs_vport_init(void)
  36 {
  37         dev_table = kcalloc(VPORT_HASH_BUCKETS, sizeof(struct hlist_head),
  38                             GFP_KERNEL);
  39         if (!dev_table)
  40                 return -ENOMEM;
  41 
  42         return 0;
  43 }
  44 
  45 /**
  46  *      ovs_vport_exit - shutdown vport subsystem
  47  *
  48  * Called at module exit time to shutdown the vport subsystem.
  49  */
  50 void ovs_vport_exit(void)
  51 {
  52         kfree(dev_table);
  53 }
  54 
  55 static struct hlist_head *hash_bucket(const struct net *net, const char *name)
  56 {
  57         unsigned int hash = jhash(name, strlen(name), (unsigned long) net);
  58         return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
  59 }
  60 
  61 int __ovs_vport_ops_register(struct vport_ops *ops)
  62 {
  63         int err = -EEXIST;
  64         struct vport_ops *o;
  65 
  66         ovs_lock();
  67         list_for_each_entry(o, &vport_ops_list, list)
  68                 if (ops->type == o->type)
  69                         goto errout;
  70 
  71         list_add_tail(&ops->list, &vport_ops_list);
  72         err = 0;
  73 errout:
  74         ovs_unlock();
  75         return err;
  76 }
  77 EXPORT_SYMBOL_GPL(__ovs_vport_ops_register);
  78 
  79 void ovs_vport_ops_unregister(struct vport_ops *ops)
  80 {
  81         ovs_lock();
  82         list_del(&ops->list);
  83         ovs_unlock();
  84 }
  85 EXPORT_SYMBOL_GPL(ovs_vport_ops_unregister);
  86 
  87 /**
  88  *      ovs_vport_locate - find a port that has already been created
  89  *
  90  * @name: name of port to find
  91  *
  92  * Must be called with ovs or RCU read lock.
  93  */
  94 struct vport *ovs_vport_locate(const struct net *net, const char *name)
  95 {
  96         struct hlist_head *bucket = hash_bucket(net, name);
  97         struct vport *vport;
  98 
  99         hlist_for_each_entry_rcu(vport, bucket, hash_node)
 100                 if (!strcmp(name, ovs_vport_name(vport)) &&
 101                     net_eq(ovs_dp_get_net(vport->dp), net))
 102                         return vport;
 103 
 104         return NULL;
 105 }
 106 
 107 /**
 108  *      ovs_vport_alloc - allocate and initialize new vport
 109  *
 110  * @priv_size: Size of private data area to allocate.
 111  * @ops: vport device ops
 112  *
 113  * Allocate and initialize a new vport defined by @ops.  The vport will contain
 114  * a private data area of size @priv_size that can be accessed using
 115  * vport_priv().  vports that are no longer needed should be released with
 116  * vport_free().
 117  */
 118 struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
 119                           const struct vport_parms *parms)
 120 {
 121         struct vport *vport;
 122         size_t alloc_size;
 123 
 124         alloc_size = sizeof(struct vport);
 125         if (priv_size) {
 126                 alloc_size = ALIGN(alloc_size, VPORT_ALIGN);
 127                 alloc_size += priv_size;
 128         }
 129 
 130         vport = kzalloc(alloc_size, GFP_KERNEL);
 131         if (!vport)
 132                 return ERR_PTR(-ENOMEM);
 133 
 134         vport->dp = parms->dp;
 135         vport->port_no = parms->port_no;
 136         vport->ops = ops;
 137         INIT_HLIST_NODE(&vport->dp_hash_node);
 138 
 139         if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) {
 140                 kfree(vport);
 141                 return ERR_PTR(-EINVAL);
 142         }
 143 
 144         return vport;
 145 }
 146 EXPORT_SYMBOL_GPL(ovs_vport_alloc);
 147 
 148 /**
 149  *      ovs_vport_free - uninitialize and free vport
 150  *
 151  * @vport: vport to free
 152  *
 153  * Frees a vport allocated with vport_alloc() when it is no longer needed.
 154  *
 155  * The caller must ensure that an RCU grace period has passed since the last
 156  * time @vport was in a datapath.
 157  */
 158 void ovs_vport_free(struct vport *vport)
 159 {
 160         /* vport is freed from RCU callback or error path, Therefore
 161          * it is safe to use raw dereference.
 162          */
 163         kfree(rcu_dereference_raw(vport->upcall_portids));
 164         kfree(vport);
 165 }
 166 EXPORT_SYMBOL_GPL(ovs_vport_free);
 167 
 168 static struct vport_ops *ovs_vport_lookup(const struct vport_parms *parms)
 169 {
 170         struct vport_ops *ops;
 171 
 172         list_for_each_entry(ops, &vport_ops_list, list)
 173                 if (ops->type == parms->type)
 174                         return ops;
 175 
 176         return NULL;
 177 }
 178 
 179 /**
 180  *      ovs_vport_add - add vport device (for kernel callers)
 181  *
 182  * @parms: Information about new vport.
 183  *
 184  * Creates a new vport with the specified configuration (which is dependent on
 185  * device type).  ovs_mutex must be held.
 186  */
 187 struct vport *ovs_vport_add(const struct vport_parms *parms)
 188 {
 189         struct vport_ops *ops;
 190         struct vport *vport;
 191 
 192         ops = ovs_vport_lookup(parms);
 193         if (ops) {
 194                 struct hlist_head *bucket;
 195 
 196                 if (!try_module_get(ops->owner))
 197                         return ERR_PTR(-EAFNOSUPPORT);
 198 
 199                 vport = ops->create(parms);
 200                 if (IS_ERR(vport)) {
 201                         module_put(ops->owner);
 202                         return vport;
 203                 }
 204 
 205                 bucket = hash_bucket(ovs_dp_get_net(vport->dp),
 206                                      ovs_vport_name(vport));
 207                 hlist_add_head_rcu(&vport->hash_node, bucket);
 208                 return vport;
 209         }
 210 
 211         /* Unlock to attempt module load and return -EAGAIN if load
 212          * was successful as we need to restart the port addition
 213          * workflow.
 214          */
 215         ovs_unlock();
 216         request_module("vport-type-%d", parms->type);
 217         ovs_lock();
 218 
 219         if (!ovs_vport_lookup(parms))
 220                 return ERR_PTR(-EAFNOSUPPORT);
 221         else
 222                 return ERR_PTR(-EAGAIN);
 223 }
 224 
 225 /**
 226  *      ovs_vport_set_options - modify existing vport device (for kernel callers)
 227  *
 228  * @vport: vport to modify.
 229  * @options: New configuration.
 230  *
 231  * Modifies an existing device with the specified configuration (which is
 232  * dependent on device type).  ovs_mutex must be held.
 233  */
 234 int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
 235 {
 236         if (!vport->ops->set_options)
 237                 return -EOPNOTSUPP;
 238         return vport->ops->set_options(vport, options);
 239 }
 240 
 241 /**
 242  *      ovs_vport_del - delete existing vport device
 243  *
 244  * @vport: vport to delete.
 245  *
 246  * Detaches @vport from its datapath and destroys it.  ovs_mutex must
 247  * be held.
 248  */
 249 void ovs_vport_del(struct vport *vport)
 250 {
 251         hlist_del_rcu(&vport->hash_node);
 252         module_put(vport->ops->owner);
 253         vport->ops->destroy(vport);
 254 }
 255 
 256 /**
 257  *      ovs_vport_get_stats - retrieve device stats
 258  *
 259  * @vport: vport from which to retrieve the stats
 260  * @stats: location to store stats
 261  *
 262  * Retrieves transmit, receive, and error stats for the given device.
 263  *
 264  * Must be called with ovs_mutex or rcu_read_lock.
 265  */
 266 void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
 267 {
 268         const struct rtnl_link_stats64 *dev_stats;
 269         struct rtnl_link_stats64 temp;
 270 
 271         dev_stats = dev_get_stats(vport->dev, &temp);
 272         stats->rx_errors  = dev_stats->rx_errors;
 273         stats->tx_errors  = dev_stats->tx_errors;
 274         stats->tx_dropped = dev_stats->tx_dropped;
 275         stats->rx_dropped = dev_stats->rx_dropped;
 276 
 277         stats->rx_bytes   = dev_stats->rx_bytes;
 278         stats->rx_packets = dev_stats->rx_packets;
 279         stats->tx_bytes   = dev_stats->tx_bytes;
 280         stats->tx_packets = dev_stats->tx_packets;
 281 }
 282 
 283 /**
 284  *      ovs_vport_get_options - retrieve device options
 285  *
 286  * @vport: vport from which to retrieve the options.
 287  * @skb: sk_buff where options should be appended.
 288  *
 289  * Retrieves the configuration of the given device, appending an
 290  * %OVS_VPORT_ATTR_OPTIONS attribute that in turn contains nested
 291  * vport-specific attributes to @skb.
 292  *
 293  * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room, or another
 294  * negative error code if a real error occurred.  If an error occurs, @skb is
 295  * left unmodified.
 296  *
 297  * Must be called with ovs_mutex or rcu_read_lock.
 298  */
 299 int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
 300 {
 301         struct nlattr *nla;
 302         int err;
 303 
 304         if (!vport->ops->get_options)
 305                 return 0;
 306 
 307         nla = nla_nest_start_noflag(skb, OVS_VPORT_ATTR_OPTIONS);
 308         if (!nla)
 309                 return -EMSGSIZE;
 310 
 311         err = vport->ops->get_options(vport, skb);
 312         if (err) {
 313                 nla_nest_cancel(skb, nla);
 314                 return err;
 315         }
 316 
 317         nla_nest_end(skb, nla);
 318         return 0;
 319 }
 320 
 321 /**
 322  *      ovs_vport_set_upcall_portids - set upcall portids of @vport.
 323  *
 324  * @vport: vport to modify.
 325  * @ids: new configuration, an array of port ids.
 326  *
 327  * Sets the vport's upcall_portids to @ids.
 328  *
 329  * Returns 0 if successful, -EINVAL if @ids is zero length or cannot be parsed
 330  * as an array of U32.
 331  *
 332  * Must be called with ovs_mutex.
 333  */
 334 int ovs_vport_set_upcall_portids(struct vport *vport, const struct nlattr *ids)
 335 {
 336         struct vport_portids *old, *vport_portids;
 337 
 338         if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
 339                 return -EINVAL;
 340 
 341         old = ovsl_dereference(vport->upcall_portids);
 342 
 343         vport_portids = kmalloc(sizeof(*vport_portids) + nla_len(ids),
 344                                 GFP_KERNEL);
 345         if (!vport_portids)
 346                 return -ENOMEM;
 347 
 348         vport_portids->n_ids = nla_len(ids) / sizeof(u32);
 349         vport_portids->rn_ids = reciprocal_value(vport_portids->n_ids);
 350         nla_memcpy(vport_portids->ids, ids, nla_len(ids));
 351 
 352         rcu_assign_pointer(vport->upcall_portids, vport_portids);
 353 
 354         if (old)
 355                 kfree_rcu(old, rcu);
 356         return 0;
 357 }
 358 
 359 /**
 360  *      ovs_vport_get_upcall_portids - get the upcall_portids of @vport.
 361  *
 362  * @vport: vport from which to retrieve the portids.
 363  * @skb: sk_buff where portids should be appended.
 364  *
 365  * Retrieves the configuration of the given vport, appending the
 366  * %OVS_VPORT_ATTR_UPCALL_PID attribute which is the array of upcall
 367  * portids to @skb.
 368  *
 369  * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room.
 370  * If an error occurs, @skb is left unmodified.  Must be called with
 371  * ovs_mutex or rcu_read_lock.
 372  */
 373 int ovs_vport_get_upcall_portids(const struct vport *vport,
 374                                  struct sk_buff *skb)
 375 {
 376         struct vport_portids *ids;
 377 
 378         ids = rcu_dereference_ovsl(vport->upcall_portids);
 379 
 380         if (vport->dp->user_features & OVS_DP_F_VPORT_PIDS)
 381                 return nla_put(skb, OVS_VPORT_ATTR_UPCALL_PID,
 382                                ids->n_ids * sizeof(u32), (void *)ids->ids);
 383         else
 384                 return nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, ids->ids[0]);
 385 }
 386 
 387 /**
 388  *      ovs_vport_find_upcall_portid - find the upcall portid to send upcall.
 389  *
 390  * @vport: vport from which the missed packet is received.
 391  * @skb: skb that the missed packet was received.
 392  *
 393  * Uses the skb_get_hash() to select the upcall portid to send the
 394  * upcall.
 395  *
 396  * Returns the portid of the target socket.  Must be called with rcu_read_lock.
 397  */
 398 u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb)
 399 {
 400         struct vport_portids *ids;
 401         u32 ids_index;
 402         u32 hash;
 403 
 404         ids = rcu_dereference(vport->upcall_portids);
 405 
 406         if (ids->n_ids == 1 && ids->ids[0] == 0)
 407                 return 0;
 408 
 409         hash = skb_get_hash(skb);
 410         ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids);
 411         return ids->ids[ids_index];
 412 }
 413 
 414 /**
 415  *      ovs_vport_receive - pass up received packet to the datapath for processing
 416  *
 417  * @vport: vport that received the packet
 418  * @skb: skb that was received
 419  * @tun_key: tunnel (if any) that carried packet
 420  *
 421  * Must be called with rcu_read_lock.  The packet cannot be shared and
 422  * skb->data should point to the Ethernet header.
 423  */
 424 int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
 425                       const struct ip_tunnel_info *tun_info)
 426 {
 427         struct sw_flow_key key;
 428         int error;
 429 
 430         OVS_CB(skb)->input_vport = vport;
 431         OVS_CB(skb)->mru = 0;
 432         OVS_CB(skb)->cutlen = 0;
 433         if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) {
 434                 u32 mark;
 435 
 436                 mark = skb->mark;
 437                 skb_scrub_packet(skb, true);
 438                 skb->mark = mark;
 439                 tun_info = NULL;
 440         }
 441 
 442         /* Extract flow from 'skb' into 'key'. */
 443         error = ovs_flow_key_extract(tun_info, skb, &key);
 444         if (unlikely(error)) {
 445                 kfree_skb(skb);
 446                 return error;
 447         }
 448         ovs_dp_process_packet(skb, &key);
 449         return 0;
 450 }
 451 
 452 static int packet_length(const struct sk_buff *skb,
 453                          struct net_device *dev)
 454 {
 455         int length = skb->len - dev->hard_header_len;
 456 
 457         if (!skb_vlan_tag_present(skb) &&
 458             eth_type_vlan(skb->protocol))
 459                 length -= VLAN_HLEN;
 460 
 461         /* Don't subtract for multiple VLAN tags. Most (all?) drivers allow
 462          * (ETH_LEN + VLAN_HLEN) in addition to the mtu value, but almost none
 463          * account for 802.1ad. e.g. is_skb_forwardable().
 464          */
 465 
 466         return length > 0 ? length : 0;
 467 }
 468 
 469 void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
 470 {
 471         int mtu = vport->dev->mtu;
 472 
 473         switch (vport->dev->type) {
 474         case ARPHRD_NONE:
 475                 if (mac_proto == MAC_PROTO_ETHERNET) {
 476                         skb_reset_network_header(skb);
 477                         skb_reset_mac_len(skb);
 478                         skb->protocol = htons(ETH_P_TEB);
 479                 } else if (mac_proto != MAC_PROTO_NONE) {
 480                         WARN_ON_ONCE(1);
 481                         goto drop;
 482                 }
 483                 break;
 484         case ARPHRD_ETHER:
 485                 if (mac_proto != MAC_PROTO_ETHERNET)
 486                         goto drop;
 487                 break;
 488         default:
 489                 goto drop;
 490         }
 491 
 492         if (unlikely(packet_length(skb, vport->dev) > mtu &&
 493                      !skb_is_gso(skb))) {
 494                 net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
 495                                      vport->dev->name,
 496                                      packet_length(skb, vport->dev), mtu);
 497                 vport->dev->stats.tx_errors++;
 498                 goto drop;
 499         }
 500 
 501         skb->dev = vport->dev;
 502         vport->ops->send(skb);
 503         return;
 504 
 505 drop:
 506         kfree_skb(skb);
 507 }

/* [<][>][^][v][top][bottom][index][help] */