This source file includes following definitions.
- packet_direct_xmit
- packet_cached_dev_get
- packet_cached_dev_assign
- packet_cached_dev_reset
- packet_use_direct_xmit
- packet_pick_tx_queue
- __register_prot_hook
- register_prot_hook
- __unregister_prot_hook
- unregister_prot_hook
- pgv_to_page
- __packet_set_status
- __packet_get_status
- tpacket_get_timestamp
- __packet_set_timestamp
- packet_lookup_frame
- packet_current_frame
- prb_del_retire_blk_timer
- prb_shutdown_retire_blk_timer
- prb_setup_retire_blk_timer
- prb_calc_retire_blk_tmo
- prb_init_ft_ops
- init_prb_bdqc
- _prb_refresh_rx_retire_blk_timer
- prb_retire_rx_blk_timer_expired
- prb_flush_block
- prb_close_block
- prb_thaw_queue
- prb_open_block
- prb_freeze_queue
- prb_dispatch_next_block
- prb_retire_current_block
- prb_curr_blk_in_use
- prb_queue_frozen
- prb_clear_blk_fill_status
- prb_fill_rxhash
- prb_clear_rxhash
- prb_fill_vlan_info
- prb_run_all_ft_ops
- prb_fill_curr_block
- __packet_lookup_frame_in_block
- packet_current_rx_frame
- prb_lookup_block
- prb_previous_blk_num
- __prb_previous_block
- packet_previous_rx_frame
- packet_increment_rx_head
- packet_previous_frame
- packet_increment_head
- packet_inc_pending
- packet_dec_pending
- packet_read_pending
- packet_alloc_pending
- packet_free_pending
- __tpacket_has_room
- __tpacket_v3_has_room
- __packet_rcv_has_room
- packet_rcv_has_room
- packet_rcv_try_clear_pressure
- packet_sock_destruct
- fanout_flow_is_huge
- fanout_demux_hash
- fanout_demux_lb
- fanout_demux_cpu
- fanout_demux_rnd
- fanout_demux_rollover
- fanout_demux_qm
- fanout_demux_bpf
- fanout_has_flag
- packet_rcv_fanout
- __fanout_link
- __fanout_unlink
- match_fanout_group
- fanout_init_data
- __fanout_set_data_bpf
- fanout_set_data_cbpf
- fanout_set_data_ebpf
- fanout_set_data
- fanout_release_data
- __fanout_id_is_free
- fanout_find_new_id
- fanout_add
- fanout_release
- packet_extra_vlan_len_allowed
- packet_rcv_spkt
- packet_parse_headers
- packet_sendmsg_spkt
- run_filter
- packet_rcv_vnet
- packet_rcv
- tpacket_rcv
- tpacket_destruct_skb
- __packet_snd_vnet_parse
- packet_snd_vnet_parse
- tpacket_fill_skb
- tpacket_parse_header
- tpacket_snd
- packet_alloc_skb
- packet_snd
- packet_sendmsg
- packet_release
- packet_do_bind
- packet_bind_spkt
- packet_bind
- packet_create
- packet_recvmsg
- packet_getname_spkt
- packet_getname
- packet_dev_mc
- packet_dev_mclist_delete
- packet_mc_add
- packet_mc_drop
- packet_flush_mclist
- packet_setsockopt
- packet_getsockopt
- compat_packet_setsockopt
- packet_notifier
- packet_ioctl
- packet_poll
- packet_mm_open
- packet_mm_close
- free_pg_vec
- alloc_one_pg_vec_page
- alloc_pg_vec
- packet_set_ring
- packet_mmap
- packet_seq_start
- packet_seq_next
- packet_seq_stop
- packet_seq_show
- packet_net_init
- packet_net_exit
- packet_exit
- packet_init
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 
  22 
  23 
  24 
  25 
  26 
  27 
  28 
  29 
  30 
  31 
  32 
  33 
  34 
  35 
  36 
  37 
  38 
  39 
  40 
  41 
  42 
  43 
  44 
  45 
  46 
  47 
  48 
  49 #include <linux/types.h>
  50 #include <linux/mm.h>
  51 #include <linux/capability.h>
  52 #include <linux/fcntl.h>
  53 #include <linux/socket.h>
  54 #include <linux/in.h>
  55 #include <linux/inet.h>
  56 #include <linux/netdevice.h>
  57 #include <linux/if_packet.h>
  58 #include <linux/wireless.h>
  59 #include <linux/kernel.h>
  60 #include <linux/kmod.h>
  61 #include <linux/slab.h>
  62 #include <linux/vmalloc.h>
  63 #include <net/net_namespace.h>
  64 #include <net/ip.h>
  65 #include <net/protocol.h>
  66 #include <linux/skbuff.h>
  67 #include <net/sock.h>
  68 #include <linux/errno.h>
  69 #include <linux/timer.h>
  70 #include <linux/uaccess.h>
  71 #include <asm/ioctls.h>
  72 #include <asm/page.h>
  73 #include <asm/cacheflush.h>
  74 #include <asm/io.h>
  75 #include <linux/proc_fs.h>
  76 #include <linux/seq_file.h>
  77 #include <linux/poll.h>
  78 #include <linux/module.h>
  79 #include <linux/init.h>
  80 #include <linux/mutex.h>
  81 #include <linux/if_vlan.h>
  82 #include <linux/virtio_net.h>
  83 #include <linux/errqueue.h>
  84 #include <linux/net_tstamp.h>
  85 #include <linux/percpu.h>
  86 #ifdef CONFIG_INET
  87 #include <net/inet_common.h>
  88 #endif
  89 #include <linux/bpf.h>
  90 #include <net/compat.h>
  91 
  92 #include "internal.h"
  93 
  94 
  95 
  96 
  97 
  98 
  99 
 100 
 101 
 102 
 103 
 104 
 105 
 106 
 107 
 108 
 109 
 110 
 111 
 112 
 113 
 114 
 115 
 116 
 117 
 118 
 119 
 120 
 121 
 122 
 123 
 124 
 125 
 126 
 127 
 128 
 129 
 130 
 131 
 132 
 133 
 134 
 135 
 136 
 137 
 138 
 139 
 140 
 141 
 142 
 143 
 144 
 145 
 146 
 147 
 148 
 149 
 150 struct packet_mreq_max {
 151         int             mr_ifindex;
 152         unsigned short  mr_type;
 153         unsigned short  mr_alen;
 154         unsigned char   mr_address[MAX_ADDR_LEN];
 155 };
 156 
 157 union tpacket_uhdr {
 158         struct tpacket_hdr  *h1;
 159         struct tpacket2_hdr *h2;
 160         struct tpacket3_hdr *h3;
 161         void *raw;
 162 };
 163 
 164 static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 165                 int closing, int tx_ring);
 166 
 167 #define V3_ALIGNMENT    (8)
 168 
 169 #define BLK_HDR_LEN     (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
 170 
 171 #define BLK_PLUS_PRIV(sz_of_priv) \
 172         (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
 173 
 174 #define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status)
 175 #define BLOCK_NUM_PKTS(x)       ((x)->hdr.bh1.num_pkts)
 176 #define BLOCK_O2FP(x)           ((x)->hdr.bh1.offset_to_first_pkt)
 177 #define BLOCK_LEN(x)            ((x)->hdr.bh1.blk_len)
 178 #define BLOCK_SNUM(x)           ((x)->hdr.bh1.seq_num)
 179 #define BLOCK_O2PRIV(x) ((x)->offset_to_priv)
 180 #define BLOCK_PRIV(x)           ((void *)((char *)(x) + BLOCK_O2PRIV(x)))
 181 
 182 struct packet_sock;
 183 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 184                        struct packet_type *pt, struct net_device *orig_dev);
 185 
 186 static void *packet_previous_frame(struct packet_sock *po,
 187                 struct packet_ring_buffer *rb,
 188                 int status);
 189 static void packet_increment_head(struct packet_ring_buffer *buff);
 190 static int prb_curr_blk_in_use(struct tpacket_block_desc *);
 191 static void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
 192                         struct packet_sock *);
 193 static void prb_retire_current_block(struct tpacket_kbdq_core *,
 194                 struct packet_sock *, unsigned int status);
 195 static int prb_queue_frozen(struct tpacket_kbdq_core *);
 196 static void prb_open_block(struct tpacket_kbdq_core *,
 197                 struct tpacket_block_desc *);
 198 static void prb_retire_rx_blk_timer_expired(struct timer_list *);
 199 static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
 200 static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
 201 static void prb_clear_rxhash(struct tpacket_kbdq_core *,
 202                 struct tpacket3_hdr *);
 203 static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
 204                 struct tpacket3_hdr *);
 205 static void packet_flush_mclist(struct sock *sk);
 206 static u16 packet_pick_tx_queue(struct sk_buff *skb);
 207 
 208 struct packet_skb_cb {
 209         union {
 210                 struct sockaddr_pkt pkt;
 211                 union {
 212                         
 213 
 214 
 215 
 216                         unsigned int origlen;
 217                         struct sockaddr_ll ll;
 218                 };
 219         } sa;
 220 };
 221 
 222 #define vio_le() virtio_legacy_is_little_endian()
 223 
 224 #define PACKET_SKB_CB(__skb)    ((struct packet_skb_cb *)((__skb)->cb))
 225 
 226 #define GET_PBDQC_FROM_RB(x)    ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))
 227 #define GET_PBLOCK_DESC(x, bid) \
 228         ((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer))
 229 #define GET_CURR_PBLOCK_DESC_FROM_CORE(x)       \
 230         ((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))
 231 #define GET_NEXT_PRB_BLK_NUM(x) \
 232         (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
 233         ((x)->kactive_blk_num+1) : 0)
 234 
 235 static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
 236 static void __fanout_link(struct sock *sk, struct packet_sock *po);
 237 
 238 static int packet_direct_xmit(struct sk_buff *skb)
 239 {
 240         return dev_direct_xmit(skb, packet_pick_tx_queue(skb));
 241 }
 242 
 243 static struct net_device *packet_cached_dev_get(struct packet_sock *po)
 244 {
 245         struct net_device *dev;
 246 
 247         rcu_read_lock();
 248         dev = rcu_dereference(po->cached_dev);
 249         if (likely(dev))
 250                 dev_hold(dev);
 251         rcu_read_unlock();
 252 
 253         return dev;
 254 }
 255 
 256 static void packet_cached_dev_assign(struct packet_sock *po,
 257                                      struct net_device *dev)
 258 {
 259         rcu_assign_pointer(po->cached_dev, dev);
 260 }
 261 
 262 static void packet_cached_dev_reset(struct packet_sock *po)
 263 {
 264         RCU_INIT_POINTER(po->cached_dev, NULL);
 265 }
 266 
 267 static bool packet_use_direct_xmit(const struct packet_sock *po)
 268 {
 269         return po->xmit == packet_direct_xmit;
 270 }
 271 
 272 static u16 packet_pick_tx_queue(struct sk_buff *skb)
 273 {
 274         struct net_device *dev = skb->dev;
 275         const struct net_device_ops *ops = dev->netdev_ops;
 276         int cpu = raw_smp_processor_id();
 277         u16 queue_index;
 278 
 279 #ifdef CONFIG_XPS
 280         skb->sender_cpu = cpu + 1;
 281 #endif
 282         skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues);
 283         if (ops->ndo_select_queue) {
 284                 queue_index = ops->ndo_select_queue(dev, skb, NULL);
 285                 queue_index = netdev_cap_txqueue(dev, queue_index);
 286         } else {
 287                 queue_index = netdev_pick_tx(dev, skb, NULL);
 288         }
 289 
 290         return queue_index;
 291 }
 292 
 293 
 294 
 295 
 296 
 297 static void __register_prot_hook(struct sock *sk)
 298 {
 299         struct packet_sock *po = pkt_sk(sk);
 300 
 301         if (!po->running) {
 302                 if (po->fanout)
 303                         __fanout_link(sk, po);
 304                 else
 305                         dev_add_pack(&po->prot_hook);
 306 
 307                 sock_hold(sk);
 308                 po->running = 1;
 309         }
 310 }
 311 
 312 static void register_prot_hook(struct sock *sk)
 313 {
 314         lockdep_assert_held_once(&pkt_sk(sk)->bind_lock);
 315         __register_prot_hook(sk);
 316 }
 317 
 318 
 319 
 320 
 321 
 322 
 323 
 324 static void __unregister_prot_hook(struct sock *sk, bool sync)
 325 {
 326         struct packet_sock *po = pkt_sk(sk);
 327 
 328         lockdep_assert_held_once(&po->bind_lock);
 329 
 330         po->running = 0;
 331 
 332         if (po->fanout)
 333                 __fanout_unlink(sk, po);
 334         else
 335                 __dev_remove_pack(&po->prot_hook);
 336 
 337         __sock_put(sk);
 338 
 339         if (sync) {
 340                 spin_unlock(&po->bind_lock);
 341                 synchronize_net();
 342                 spin_lock(&po->bind_lock);
 343         }
 344 }
 345 
 346 static void unregister_prot_hook(struct sock *sk, bool sync)
 347 {
 348         struct packet_sock *po = pkt_sk(sk);
 349 
 350         if (po->running)
 351                 __unregister_prot_hook(sk, sync);
 352 }
 353 
 354 static inline struct page * __pure pgv_to_page(void *addr)
 355 {
 356         if (is_vmalloc_addr(addr))
 357                 return vmalloc_to_page(addr);
 358         return virt_to_page(addr);
 359 }
 360 
 361 static void __packet_set_status(struct packet_sock *po, void *frame, int status)
 362 {
 363         union tpacket_uhdr h;
 364 
 365         h.raw = frame;
 366         switch (po->tp_version) {
 367         case TPACKET_V1:
 368                 h.h1->tp_status = status;
 369                 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
 370                 break;
 371         case TPACKET_V2:
 372                 h.h2->tp_status = status;
 373                 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
 374                 break;
 375         case TPACKET_V3:
 376                 h.h3->tp_status = status;
 377                 flush_dcache_page(pgv_to_page(&h.h3->tp_status));
 378                 break;
 379         default:
 380                 WARN(1, "TPACKET version not supported.\n");
 381                 BUG();
 382         }
 383 
 384         smp_wmb();
 385 }
 386 
 387 static int __packet_get_status(const struct packet_sock *po, void *frame)
 388 {
 389         union tpacket_uhdr h;
 390 
 391         smp_rmb();
 392 
 393         h.raw = frame;
 394         switch (po->tp_version) {
 395         case TPACKET_V1:
 396                 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
 397                 return h.h1->tp_status;
 398         case TPACKET_V2:
 399                 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
 400                 return h.h2->tp_status;
 401         case TPACKET_V3:
 402                 flush_dcache_page(pgv_to_page(&h.h3->tp_status));
 403                 return h.h3->tp_status;
 404         default:
 405                 WARN(1, "TPACKET version not supported.\n");
 406                 BUG();
 407                 return 0;
 408         }
 409 }
 410 
 411 static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
 412                                    unsigned int flags)
 413 {
 414         struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
 415 
 416         if (shhwtstamps &&
 417             (flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
 418             ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
 419                 return TP_STATUS_TS_RAW_HARDWARE;
 420 
 421         if (ktime_to_timespec_cond(skb->tstamp, ts))
 422                 return TP_STATUS_TS_SOFTWARE;
 423 
 424         return 0;
 425 }
 426 
 427 static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
 428                                     struct sk_buff *skb)
 429 {
 430         union tpacket_uhdr h;
 431         struct timespec ts;
 432         __u32 ts_status;
 433 
 434         if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
 435                 return 0;
 436 
 437         h.raw = frame;
 438         switch (po->tp_version) {
 439         case TPACKET_V1:
 440                 h.h1->tp_sec = ts.tv_sec;
 441                 h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
 442                 break;
 443         case TPACKET_V2:
 444                 h.h2->tp_sec = ts.tv_sec;
 445                 h.h2->tp_nsec = ts.tv_nsec;
 446                 break;
 447         case TPACKET_V3:
 448                 h.h3->tp_sec = ts.tv_sec;
 449                 h.h3->tp_nsec = ts.tv_nsec;
 450                 break;
 451         default:
 452                 WARN(1, "TPACKET version not supported.\n");
 453                 BUG();
 454         }
 455 
 456         
 457         flush_dcache_page(pgv_to_page(&h.h1->tp_sec));
 458         smp_wmb();
 459 
 460         return ts_status;
 461 }
 462 
 463 static void *packet_lookup_frame(const struct packet_sock *po,
 464                                  const struct packet_ring_buffer *rb,
 465                                  unsigned int position,
 466                                  int status)
 467 {
 468         unsigned int pg_vec_pos, frame_offset;
 469         union tpacket_uhdr h;
 470 
 471         pg_vec_pos = position / rb->frames_per_block;
 472         frame_offset = position % rb->frames_per_block;
 473 
 474         h.raw = rb->pg_vec[pg_vec_pos].buffer +
 475                 (frame_offset * rb->frame_size);
 476 
 477         if (status != __packet_get_status(po, h.raw))
 478                 return NULL;
 479 
 480         return h.raw;
 481 }
 482 
 483 static void *packet_current_frame(struct packet_sock *po,
 484                 struct packet_ring_buffer *rb,
 485                 int status)
 486 {
 487         return packet_lookup_frame(po, rb, rb->head, status);
 488 }
 489 
 490 static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
 491 {
 492         del_timer_sync(&pkc->retire_blk_timer);
 493 }
 494 
 495 static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
 496                 struct sk_buff_head *rb_queue)
 497 {
 498         struct tpacket_kbdq_core *pkc;
 499 
 500         pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
 501 
 502         spin_lock_bh(&rb_queue->lock);
 503         pkc->delete_blk_timer = 1;
 504         spin_unlock_bh(&rb_queue->lock);
 505 
 506         prb_del_retire_blk_timer(pkc);
 507 }
 508 
 509 static void prb_setup_retire_blk_timer(struct packet_sock *po)
 510 {
 511         struct tpacket_kbdq_core *pkc;
 512 
 513         pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
 514         timer_setup(&pkc->retire_blk_timer, prb_retire_rx_blk_timer_expired,
 515                     0);
 516         pkc->retire_blk_timer.expires = jiffies;
 517 }
 518 
 519 static int prb_calc_retire_blk_tmo(struct packet_sock *po,
 520                                 int blk_size_in_bytes)
 521 {
 522         struct net_device *dev;
 523         unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
 524         struct ethtool_link_ksettings ecmd;
 525         int err;
 526 
 527         rtnl_lock();
 528         dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex);
 529         if (unlikely(!dev)) {
 530                 rtnl_unlock();
 531                 return DEFAULT_PRB_RETIRE_TOV;
 532         }
 533         err = __ethtool_get_link_ksettings(dev, &ecmd);
 534         rtnl_unlock();
 535         if (!err) {
 536                 
 537 
 538 
 539 
 540                 if (ecmd.base.speed < SPEED_1000 ||
 541                     ecmd.base.speed == SPEED_UNKNOWN) {
 542                         return DEFAULT_PRB_RETIRE_TOV;
 543                 } else {
 544                         msec = 1;
 545                         div = ecmd.base.speed / 1000;
 546                 }
 547         } else
 548                 return DEFAULT_PRB_RETIRE_TOV;
 549 
 550         mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
 551 
 552         if (div)
 553                 mbits /= div;
 554 
 555         tmo = mbits * msec;
 556 
 557         if (div)
 558                 return tmo+1;
 559         return tmo;
 560 }
 561 
 562 static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
 563                         union tpacket_req_u *req_u)
 564 {
 565         p1->feature_req_word = req_u->req3.tp_feature_req_word;
 566 }
 567 
 568 static void init_prb_bdqc(struct packet_sock *po,
 569                         struct packet_ring_buffer *rb,
 570                         struct pgv *pg_vec,
 571                         union tpacket_req_u *req_u)
 572 {
 573         struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb);
 574         struct tpacket_block_desc *pbd;
 575 
 576         memset(p1, 0x0, sizeof(*p1));
 577 
 578         p1->knxt_seq_num = 1;
 579         p1->pkbdq = pg_vec;
 580         pbd = (struct tpacket_block_desc *)pg_vec[0].buffer;
 581         p1->pkblk_start = pg_vec[0].buffer;
 582         p1->kblk_size = req_u->req3.tp_block_size;
 583         p1->knum_blocks = req_u->req3.tp_block_nr;
 584         p1->hdrlen = po->tp_hdrlen;
 585         p1->version = po->tp_version;
 586         p1->last_kactive_blk_num = 0;
 587         po->stats.stats3.tp_freeze_q_cnt = 0;
 588         if (req_u->req3.tp_retire_blk_tov)
 589                 p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
 590         else
 591                 p1->retire_blk_tov = prb_calc_retire_blk_tmo(po,
 592                                                 req_u->req3.tp_block_size);
 593         p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
 594         p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
 595 
 596         p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
 597         prb_init_ft_ops(p1, req_u);
 598         prb_setup_retire_blk_timer(po);
 599         prb_open_block(p1, pbd);
 600 }
 601 
 602 
 603 
 604 
 605 static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
 606 {
 607         mod_timer(&pkc->retire_blk_timer,
 608                         jiffies + pkc->tov_in_jiffies);
 609         pkc->last_kactive_blk_num = pkc->kactive_blk_num;
 610 }
 611 
 612 
 613 
 614 
 615 
 616 
 617 
 618 
 619 
 620 
 621 
 622 
 623 
 624 
 625 
 626 
 627 
 628 
 629 
 630 
 631 
 632 
 633 
 634 
 635 static void prb_retire_rx_blk_timer_expired(struct timer_list *t)
 636 {
 637         struct packet_sock *po =
 638                 from_timer(po, t, rx_ring.prb_bdqc.retire_blk_timer);
 639         struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
 640         unsigned int frozen;
 641         struct tpacket_block_desc *pbd;
 642 
 643         spin_lock(&po->sk.sk_receive_queue.lock);
 644 
 645         frozen = prb_queue_frozen(pkc);
 646         pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
 647 
 648         if (unlikely(pkc->delete_blk_timer))
 649                 goto out;
 650 
 651         
 652 
 653 
 654 
 655 
 656 
 657 
 658 
 659 
 660         if (BLOCK_NUM_PKTS(pbd)) {
 661                 while (atomic_read(&pkc->blk_fill_in_prog)) {
 662                         
 663                         cpu_relax();
 664                 }
 665         }
 666 
 667         if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
 668                 if (!frozen) {
 669                         if (!BLOCK_NUM_PKTS(pbd)) {
 670                                 
 671                                 goto refresh_timer;
 672                         }
 673                         prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
 674                         if (!prb_dispatch_next_block(pkc, po))
 675                                 goto refresh_timer;
 676                         else
 677                                 goto out;
 678                 } else {
 679                         
 680 
 681 
 682                         if (prb_curr_blk_in_use(pbd)) {
 683                                 
 684 
 685 
 686 
 687                                 goto refresh_timer;
 688                         } else {
 689                                
 690 
 691 
 692 
 693 
 694 
 695 
 696                                 prb_open_block(pkc, pbd);
 697                                 goto out;
 698                         }
 699                 }
 700         }
 701 
 702 refresh_timer:
 703         _prb_refresh_rx_retire_blk_timer(pkc);
 704 
 705 out:
 706         spin_unlock(&po->sk.sk_receive_queue.lock);
 707 }
 708 
 709 static void prb_flush_block(struct tpacket_kbdq_core *pkc1,
 710                 struct tpacket_block_desc *pbd1, __u32 status)
 711 {
 712         
 713 
 714 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
 715         u8 *start, *end;
 716 
 717         start = (u8 *)pbd1;
 718 
 719         
 720         start += PAGE_SIZE;
 721 
 722         end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end);
 723         for (; start < end; start += PAGE_SIZE)
 724                 flush_dcache_page(pgv_to_page(start));
 725 
 726         smp_wmb();
 727 #endif
 728 
 729         
 730 
 731         BLOCK_STATUS(pbd1) = status;
 732 
 733         
 734 
 735 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
 736         start = (u8 *)pbd1;
 737         flush_dcache_page(pgv_to_page(start));
 738 
 739         smp_wmb();
 740 #endif
 741 }
 742 
 743 
 744 
 745 
 746 
 747 
 748 
 749 
 750 
 751 
 752 static void prb_close_block(struct tpacket_kbdq_core *pkc1,
 753                 struct tpacket_block_desc *pbd1,
 754                 struct packet_sock *po, unsigned int stat)
 755 {
 756         __u32 status = TP_STATUS_USER | stat;
 757 
 758         struct tpacket3_hdr *last_pkt;
 759         struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
 760         struct sock *sk = &po->sk;
 761 
 762         if (atomic_read(&po->tp_drops))
 763                 status |= TP_STATUS_LOSING;
 764 
 765         last_pkt = (struct tpacket3_hdr *)pkc1->prev;
 766         last_pkt->tp_next_offset = 0;
 767 
 768         
 769         if (BLOCK_NUM_PKTS(pbd1)) {
 770                 h1->ts_last_pkt.ts_sec = last_pkt->tp_sec;
 771                 h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec;
 772         } else {
 773                 
 774 
 775 
 776 
 777 
 778                 struct timespec ts;
 779                 getnstimeofday(&ts);
 780                 h1->ts_last_pkt.ts_sec = ts.tv_sec;
 781                 h1->ts_last_pkt.ts_nsec = ts.tv_nsec;
 782         }
 783 
 784         smp_wmb();
 785 
 786         
 787         prb_flush_block(pkc1, pbd1, status);
 788 
 789         sk->sk_data_ready(sk);
 790 
 791         pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1);
 792 }
 793 
 794 static void prb_thaw_queue(struct tpacket_kbdq_core *pkc)
 795 {
 796         pkc->reset_pending_on_curr_blk = 0;
 797 }
 798 
 799 
 800 
 801 
 802 
 803 
 804 
 805 
 806 static void prb_open_block(struct tpacket_kbdq_core *pkc1,
 807         struct tpacket_block_desc *pbd1)
 808 {
 809         struct timespec ts;
 810         struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
 811 
 812         smp_rmb();
 813 
 814         
 815 
 816 
 817 
 818         BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
 819         BLOCK_NUM_PKTS(pbd1) = 0;
 820         BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
 821 
 822         getnstimeofday(&ts);
 823 
 824         h1->ts_first_pkt.ts_sec = ts.tv_sec;
 825         h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
 826 
 827         pkc1->pkblk_start = (char *)pbd1;
 828         pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
 829 
 830         BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
 831         BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
 832 
 833         pbd1->version = pkc1->version;
 834         pkc1->prev = pkc1->nxt_offset;
 835         pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
 836 
 837         prb_thaw_queue(pkc1);
 838         _prb_refresh_rx_retire_blk_timer(pkc1);
 839 
 840         smp_wmb();
 841 }
 842 
 843 
 844 
 845 
 846 
 847 
 848 
 849 
 850 
 851 
 852 
 853 
 854 
 855 
 856 
 857 
 858 
 859 
 860 
 861 
 862 
 863 
 864 
 865 
 866 static void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
 867                                   struct packet_sock *po)
 868 {
 869         pkc->reset_pending_on_curr_blk = 1;
 870         po->stats.stats3.tp_freeze_q_cnt++;
 871 }
 872 
 873 #define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT))
 874 
 875 
 876 
 877 
 878 
 879 
 880 
 881 static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc,
 882                 struct packet_sock *po)
 883 {
 884         struct tpacket_block_desc *pbd;
 885 
 886         smp_rmb();
 887 
 888         
 889         pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
 890 
 891         
 892         if (TP_STATUS_USER & BLOCK_STATUS(pbd)) {
 893                 prb_freeze_queue(pkc, po);
 894                 return NULL;
 895         }
 896 
 897         
 898 
 899 
 900 
 901 
 902         prb_open_block(pkc, pbd);
 903         return (void *)pkc->nxt_offset;
 904 }
 905 
 906 static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
 907                 struct packet_sock *po, unsigned int status)
 908 {
 909         struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
 910 
 911         
 912         if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) {
 913                 
 914 
 915 
 916 
 917 
 918 
 919 
 920 
 921 
 922                 if (!(status & TP_STATUS_BLK_TMO)) {
 923                         while (atomic_read(&pkc->blk_fill_in_prog)) {
 924                                 
 925                                 cpu_relax();
 926                         }
 927                 }
 928                 prb_close_block(pkc, pbd, po, status);
 929                 return;
 930         }
 931 }
 932 
 933 static int prb_curr_blk_in_use(struct tpacket_block_desc *pbd)
 934 {
 935         return TP_STATUS_USER & BLOCK_STATUS(pbd);
 936 }
 937 
 938 static int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
 939 {
 940         return pkc->reset_pending_on_curr_blk;
 941 }
 942 
 943 static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
 944 {
 945         struct tpacket_kbdq_core *pkc  = GET_PBDQC_FROM_RB(rb);
 946         atomic_dec(&pkc->blk_fill_in_prog);
 947 }
 948 
 949 static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
 950                         struct tpacket3_hdr *ppd)
 951 {
 952         ppd->hv1.tp_rxhash = skb_get_hash(pkc->skb);
 953 }
 954 
 955 static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
 956                         struct tpacket3_hdr *ppd)
 957 {
 958         ppd->hv1.tp_rxhash = 0;
 959 }
 960 
 961 static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
 962                         struct tpacket3_hdr *ppd)
 963 {
 964         if (skb_vlan_tag_present(pkc->skb)) {
 965                 ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb);
 966                 ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto);
 967                 ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
 968         } else {
 969                 ppd->hv1.tp_vlan_tci = 0;
 970                 ppd->hv1.tp_vlan_tpid = 0;
 971                 ppd->tp_status = TP_STATUS_AVAILABLE;
 972         }
 973 }
 974 
 975 static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc,
 976                         struct tpacket3_hdr *ppd)
 977 {
 978         ppd->hv1.tp_padding = 0;
 979         prb_fill_vlan_info(pkc, ppd);
 980 
 981         if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH)
 982                 prb_fill_rxhash(pkc, ppd);
 983         else
 984                 prb_clear_rxhash(pkc, ppd);
 985 }
 986 
 987 static void prb_fill_curr_block(char *curr,
 988                                 struct tpacket_kbdq_core *pkc,
 989                                 struct tpacket_block_desc *pbd,
 990                                 unsigned int len)
 991 {
 992         struct tpacket3_hdr *ppd;
 993 
 994         ppd  = (struct tpacket3_hdr *)curr;
 995         ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len);
 996         pkc->prev = curr;
 997         pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
 998         BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
 999         BLOCK_NUM_PKTS(pbd) += 1;
1000         atomic_inc(&pkc->blk_fill_in_prog);
1001         prb_run_all_ft_ops(pkc, ppd);
1002 }
1003 
1004 
1005 static void *__packet_lookup_frame_in_block(struct packet_sock *po,
1006                                             struct sk_buff *skb,
1007                                             unsigned int len
1008                                             )
1009 {
1010         struct tpacket_kbdq_core *pkc;
1011         struct tpacket_block_desc *pbd;
1012         char *curr, *end;
1013 
1014         pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
1015         pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
1016 
1017         
1018         if (prb_queue_frozen(pkc)) {
1019                 
1020 
1021 
1022 
1023                 if (prb_curr_blk_in_use(pbd)) {
1024                         
1025                         return NULL;
1026                 } else {
1027                         
1028 
1029 
1030 
1031 
1032 
1033                         prb_open_block(pkc, pbd);
1034                 }
1035         }
1036 
1037         smp_mb();
1038         curr = pkc->nxt_offset;
1039         pkc->skb = skb;
1040         end = (char *)pbd + pkc->kblk_size;
1041 
1042         
1043         if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) {
1044                 prb_fill_curr_block(curr, pkc, pbd, len);
1045                 return (void *)curr;
1046         }
1047 
1048         
1049         prb_retire_current_block(pkc, po, 0);
1050 
1051         
1052         curr = (char *)prb_dispatch_next_block(pkc, po);
1053         if (curr) {
1054                 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
1055                 prb_fill_curr_block(curr, pkc, pbd, len);
1056                 return (void *)curr;
1057         }
1058 
1059         
1060 
1061 
1062 
1063         return NULL;
1064 }
1065 
1066 static void *packet_current_rx_frame(struct packet_sock *po,
1067                                             struct sk_buff *skb,
1068                                             int status, unsigned int len)
1069 {
1070         char *curr = NULL;
1071         switch (po->tp_version) {
1072         case TPACKET_V1:
1073         case TPACKET_V2:
1074                 curr = packet_lookup_frame(po, &po->rx_ring,
1075                                         po->rx_ring.head, status);
1076                 return curr;
1077         case TPACKET_V3:
1078                 return __packet_lookup_frame_in_block(po, skb, len);
1079         default:
1080                 WARN(1, "TPACKET version not supported\n");
1081                 BUG();
1082                 return NULL;
1083         }
1084 }
1085 
1086 static void *prb_lookup_block(const struct packet_sock *po,
1087                               const struct packet_ring_buffer *rb,
1088                               unsigned int idx,
1089                               int status)
1090 {
1091         struct tpacket_kbdq_core *pkc  = GET_PBDQC_FROM_RB(rb);
1092         struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);
1093 
1094         if (status != BLOCK_STATUS(pbd))
1095                 return NULL;
1096         return pbd;
1097 }
1098 
1099 static int prb_previous_blk_num(struct packet_ring_buffer *rb)
1100 {
1101         unsigned int prev;
1102         if (rb->prb_bdqc.kactive_blk_num)
1103                 prev = rb->prb_bdqc.kactive_blk_num-1;
1104         else
1105                 prev = rb->prb_bdqc.knum_blocks-1;
1106         return prev;
1107 }
1108 
1109 
1110 static void *__prb_previous_block(struct packet_sock *po,
1111                                          struct packet_ring_buffer *rb,
1112                                          int status)
1113 {
1114         unsigned int previous = prb_previous_blk_num(rb);
1115         return prb_lookup_block(po, rb, previous, status);
1116 }
1117 
1118 static void *packet_previous_rx_frame(struct packet_sock *po,
1119                                              struct packet_ring_buffer *rb,
1120                                              int status)
1121 {
1122         if (po->tp_version <= TPACKET_V2)
1123                 return packet_previous_frame(po, rb, status);
1124 
1125         return __prb_previous_block(po, rb, status);
1126 }
1127 
1128 static void packet_increment_rx_head(struct packet_sock *po,
1129                                             struct packet_ring_buffer *rb)
1130 {
1131         switch (po->tp_version) {
1132         case TPACKET_V1:
1133         case TPACKET_V2:
1134                 return packet_increment_head(rb);
1135         case TPACKET_V3:
1136         default:
1137                 WARN(1, "TPACKET version not supported.\n");
1138                 BUG();
1139                 return;
1140         }
1141 }
1142 
1143 static void *packet_previous_frame(struct packet_sock *po,
1144                 struct packet_ring_buffer *rb,
1145                 int status)
1146 {
1147         unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
1148         return packet_lookup_frame(po, rb, previous, status);
1149 }
1150 
1151 static void packet_increment_head(struct packet_ring_buffer *buff)
1152 {
1153         buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
1154 }
1155 
1156 static void packet_inc_pending(struct packet_ring_buffer *rb)
1157 {
1158         this_cpu_inc(*rb->pending_refcnt);
1159 }
1160 
1161 static void packet_dec_pending(struct packet_ring_buffer *rb)
1162 {
1163         this_cpu_dec(*rb->pending_refcnt);
1164 }
1165 
1166 static unsigned int packet_read_pending(const struct packet_ring_buffer *rb)
1167 {
1168         unsigned int refcnt = 0;
1169         int cpu;
1170 
1171         
1172         if (rb->pending_refcnt == NULL)
1173                 return 0;
1174 
1175         for_each_possible_cpu(cpu)
1176                 refcnt += *per_cpu_ptr(rb->pending_refcnt, cpu);
1177 
1178         return refcnt;
1179 }
1180 
1181 static int packet_alloc_pending(struct packet_sock *po)
1182 {
1183         po->rx_ring.pending_refcnt = NULL;
1184 
1185         po->tx_ring.pending_refcnt = alloc_percpu(unsigned int);
1186         if (unlikely(po->tx_ring.pending_refcnt == NULL))
1187                 return -ENOBUFS;
1188 
1189         return 0;
1190 }
1191 
1192 static void packet_free_pending(struct packet_sock *po)
1193 {
1194         free_percpu(po->tx_ring.pending_refcnt);
1195 }
1196 
1197 #define ROOM_POW_OFF    2
1198 #define ROOM_NONE       0x0
1199 #define ROOM_LOW        0x1
1200 #define ROOM_NORMAL     0x2
1201 
1202 static bool __tpacket_has_room(const struct packet_sock *po, int pow_off)
1203 {
1204         int idx, len;
1205 
1206         len = READ_ONCE(po->rx_ring.frame_max) + 1;
1207         idx = READ_ONCE(po->rx_ring.head);
1208         if (pow_off)
1209                 idx += len >> pow_off;
1210         if (idx >= len)
1211                 idx -= len;
1212         return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
1213 }
1214 
1215 static bool __tpacket_v3_has_room(const struct packet_sock *po, int pow_off)
1216 {
1217         int idx, len;
1218 
1219         len = READ_ONCE(po->rx_ring.prb_bdqc.knum_blocks);
1220         idx = READ_ONCE(po->rx_ring.prb_bdqc.kactive_blk_num);
1221         if (pow_off)
1222                 idx += len >> pow_off;
1223         if (idx >= len)
1224                 idx -= len;
1225         return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
1226 }
1227 
1228 static int __packet_rcv_has_room(const struct packet_sock *po,
1229                                  const struct sk_buff *skb)
1230 {
1231         const struct sock *sk = &po->sk;
1232         int ret = ROOM_NONE;
1233 
1234         if (po->prot_hook.func != tpacket_rcv) {
1235                 int rcvbuf = READ_ONCE(sk->sk_rcvbuf);
1236                 int avail = rcvbuf - atomic_read(&sk->sk_rmem_alloc)
1237                                    - (skb ? skb->truesize : 0);
1238 
1239                 if (avail > (rcvbuf >> ROOM_POW_OFF))
1240                         return ROOM_NORMAL;
1241                 else if (avail > 0)
1242                         return ROOM_LOW;
1243                 else
1244                         return ROOM_NONE;
1245         }
1246 
1247         if (po->tp_version == TPACKET_V3) {
1248                 if (__tpacket_v3_has_room(po, ROOM_POW_OFF))
1249                         ret = ROOM_NORMAL;
1250                 else if (__tpacket_v3_has_room(po, 0))
1251                         ret = ROOM_LOW;
1252         } else {
1253                 if (__tpacket_has_room(po, ROOM_POW_OFF))
1254                         ret = ROOM_NORMAL;
1255                 else if (__tpacket_has_room(po, 0))
1256                         ret = ROOM_LOW;
1257         }
1258 
1259         return ret;
1260 }
1261 
1262 static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
1263 {
1264         int pressure, ret;
1265 
1266         ret = __packet_rcv_has_room(po, skb);
1267         pressure = ret != ROOM_NORMAL;
1268 
1269         if (READ_ONCE(po->pressure) != pressure)
1270                 WRITE_ONCE(po->pressure, pressure);
1271 
1272         return ret;
1273 }
1274 
1275 static void packet_rcv_try_clear_pressure(struct packet_sock *po)
1276 {
1277         if (READ_ONCE(po->pressure) &&
1278             __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
1279                 WRITE_ONCE(po->pressure,  0);
1280 }
1281 
1282 static void packet_sock_destruct(struct sock *sk)
1283 {
1284         skb_queue_purge(&sk->sk_error_queue);
1285 
1286         WARN_ON(atomic_read(&sk->sk_rmem_alloc));
1287         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
1288 
1289         if (!sock_flag(sk, SOCK_DEAD)) {
1290                 pr_err("Attempt to release alive packet socket: %p\n", sk);
1291                 return;
1292         }
1293 
1294         sk_refcnt_debug_dec(sk);
1295 }
1296 
1297 static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb)
1298 {
1299         u32 *history = po->rollover->history;
1300         u32 victim, rxhash;
1301         int i, count = 0;
1302 
1303         rxhash = skb_get_hash(skb);
1304         for (i = 0; i < ROLLOVER_HLEN; i++)
1305                 if (READ_ONCE(history[i]) == rxhash)
1306                         count++;
1307 
1308         victim = prandom_u32() % ROLLOVER_HLEN;
1309 
1310         
1311         if (READ_ONCE(history[victim]) != rxhash)
1312                 WRITE_ONCE(history[victim], rxhash);
1313 
1314         return count > (ROLLOVER_HLEN >> 1);
1315 }
1316 
1317 static unsigned int fanout_demux_hash(struct packet_fanout *f,
1318                                       struct sk_buff *skb,
1319                                       unsigned int num)
1320 {
1321         return reciprocal_scale(__skb_get_hash_symmetric(skb), num);
1322 }
1323 
1324 static unsigned int fanout_demux_lb(struct packet_fanout *f,
1325                                     struct sk_buff *skb,
1326                                     unsigned int num)
1327 {
1328         unsigned int val = atomic_inc_return(&f->rr_cur);
1329 
1330         return val % num;
1331 }
1332 
1333 static unsigned int fanout_demux_cpu(struct packet_fanout *f,
1334                                      struct sk_buff *skb,
1335                                      unsigned int num)
1336 {
1337         return smp_processor_id() % num;
1338 }
1339 
1340 static unsigned int fanout_demux_rnd(struct packet_fanout *f,
1341                                      struct sk_buff *skb,
1342                                      unsigned int num)
1343 {
1344         return prandom_u32_max(num);
1345 }
1346 
1347 static unsigned int fanout_demux_rollover(struct packet_fanout *f,
1348                                           struct sk_buff *skb,
1349                                           unsigned int idx, bool try_self,
1350                                           unsigned int num)
1351 {
1352         struct packet_sock *po, *po_next, *po_skip = NULL;
1353         unsigned int i, j, room = ROOM_NONE;
1354 
1355         po = pkt_sk(f->arr[idx]);
1356 
1357         if (try_self) {
1358                 room = packet_rcv_has_room(po, skb);
1359                 if (room == ROOM_NORMAL ||
1360                     (room == ROOM_LOW && !fanout_flow_is_huge(po, skb)))
1361                         return idx;
1362                 po_skip = po;
1363         }
1364 
1365         i = j = min_t(int, po->rollover->sock, num - 1);
1366         do {
1367                 po_next = pkt_sk(f->arr[i]);
1368                 if (po_next != po_skip && !READ_ONCE(po_next->pressure) &&
1369                     packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
1370                         if (i != j)
1371                                 po->rollover->sock = i;
1372                         atomic_long_inc(&po->rollover->num);
1373                         if (room == ROOM_LOW)
1374                                 atomic_long_inc(&po->rollover->num_huge);
1375                         return i;
1376                 }
1377 
1378                 if (++i == num)
1379                         i = 0;
1380         } while (i != j);
1381 
1382         atomic_long_inc(&po->rollover->num_failed);
1383         return idx;
1384 }
1385 
1386 static unsigned int fanout_demux_qm(struct packet_fanout *f,
1387                                     struct sk_buff *skb,
1388                                     unsigned int num)
1389 {
1390         return skb_get_queue_mapping(skb) % num;
1391 }
1392 
1393 static unsigned int fanout_demux_bpf(struct packet_fanout *f,
1394                                      struct sk_buff *skb,
1395                                      unsigned int num)
1396 {
1397         struct bpf_prog *prog;
1398         unsigned int ret = 0;
1399 
1400         rcu_read_lock();
1401         prog = rcu_dereference(f->bpf_prog);
1402         if (prog)
1403                 ret = bpf_prog_run_clear_cb(prog, skb) % num;
1404         rcu_read_unlock();
1405 
1406         return ret;
1407 }
1408 
1409 static bool fanout_has_flag(struct packet_fanout *f, u16 flag)
1410 {
1411         return f->flags & (flag >> 8);
1412 }
1413 
1414 static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
1415                              struct packet_type *pt, struct net_device *orig_dev)
1416 {
1417         struct packet_fanout *f = pt->af_packet_priv;
1418         unsigned int num = READ_ONCE(f->num_members);
1419         struct net *net = read_pnet(&f->net);
1420         struct packet_sock *po;
1421         unsigned int idx;
1422 
1423         if (!net_eq(dev_net(dev), net) || !num) {
1424                 kfree_skb(skb);
1425                 return 0;
1426         }
1427 
1428         if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
1429                 skb = ip_check_defrag(net, skb, IP_DEFRAG_AF_PACKET);
1430                 if (!skb)
1431                         return 0;
1432         }
1433         switch (f->type) {
1434         case PACKET_FANOUT_HASH:
1435         default:
1436                 idx = fanout_demux_hash(f, skb, num);
1437                 break;
1438         case PACKET_FANOUT_LB:
1439                 idx = fanout_demux_lb(f, skb, num);
1440                 break;
1441         case PACKET_FANOUT_CPU:
1442                 idx = fanout_demux_cpu(f, skb, num);
1443                 break;
1444         case PACKET_FANOUT_RND:
1445                 idx = fanout_demux_rnd(f, skb, num);
1446                 break;
1447         case PACKET_FANOUT_QM:
1448                 idx = fanout_demux_qm(f, skb, num);
1449                 break;
1450         case PACKET_FANOUT_ROLLOVER:
1451                 idx = fanout_demux_rollover(f, skb, 0, false, num);
1452                 break;
1453         case PACKET_FANOUT_CBPF:
1454         case PACKET_FANOUT_EBPF:
1455                 idx = fanout_demux_bpf(f, skb, num);
1456                 break;
1457         }
1458 
1459         if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
1460                 idx = fanout_demux_rollover(f, skb, idx, true, num);
1461 
1462         po = pkt_sk(f->arr[idx]);
1463         return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
1464 }
1465 
1466 DEFINE_MUTEX(fanout_mutex);
1467 EXPORT_SYMBOL_GPL(fanout_mutex);
1468 static LIST_HEAD(fanout_list);
1469 static u16 fanout_next_id;
1470 
1471 static void __fanout_link(struct sock *sk, struct packet_sock *po)
1472 {
1473         struct packet_fanout *f = po->fanout;
1474 
1475         spin_lock(&f->lock);
1476         f->arr[f->num_members] = sk;
1477         smp_wmb();
1478         f->num_members++;
1479         if (f->num_members == 1)
1480                 dev_add_pack(&f->prot_hook);
1481         spin_unlock(&f->lock);
1482 }
1483 
1484 static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
1485 {
1486         struct packet_fanout *f = po->fanout;
1487         int i;
1488 
1489         spin_lock(&f->lock);
1490         for (i = 0; i < f->num_members; i++) {
1491                 if (f->arr[i] == sk)
1492                         break;
1493         }
1494         BUG_ON(i >= f->num_members);
1495         f->arr[i] = f->arr[f->num_members - 1];
1496         f->num_members--;
1497         if (f->num_members == 0)
1498                 __dev_remove_pack(&f->prot_hook);
1499         spin_unlock(&f->lock);
1500 }
1501 
1502 static bool match_fanout_group(struct packet_type *ptype, struct sock *sk)
1503 {
1504         if (sk->sk_family != PF_PACKET)
1505                 return false;
1506 
1507         return ptype->af_packet_priv == pkt_sk(sk)->fanout;
1508 }
1509 
1510 static void fanout_init_data(struct packet_fanout *f)
1511 {
1512         switch (f->type) {
1513         case PACKET_FANOUT_LB:
1514                 atomic_set(&f->rr_cur, 0);
1515                 break;
1516         case PACKET_FANOUT_CBPF:
1517         case PACKET_FANOUT_EBPF:
1518                 RCU_INIT_POINTER(f->bpf_prog, NULL);
1519                 break;
1520         }
1521 }
1522 
1523 static void __fanout_set_data_bpf(struct packet_fanout *f, struct bpf_prog *new)
1524 {
1525         struct bpf_prog *old;
1526 
1527         spin_lock(&f->lock);
1528         old = rcu_dereference_protected(f->bpf_prog, lockdep_is_held(&f->lock));
1529         rcu_assign_pointer(f->bpf_prog, new);
1530         spin_unlock(&f->lock);
1531 
1532         if (old) {
1533                 synchronize_net();
1534                 bpf_prog_destroy(old);
1535         }
1536 }
1537 
1538 static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
1539                                 unsigned int len)
1540 {
1541         struct bpf_prog *new;
1542         struct sock_fprog fprog;
1543         int ret;
1544 
1545         if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
1546                 return -EPERM;
1547         if (len != sizeof(fprog))
1548                 return -EINVAL;
1549         if (copy_from_user(&fprog, data, len))
1550                 return -EFAULT;
1551 
1552         ret = bpf_prog_create_from_user(&new, &fprog, NULL, false);
1553         if (ret)
1554                 return ret;
1555 
1556         __fanout_set_data_bpf(po->fanout, new);
1557         return 0;
1558 }
1559 
1560 static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
1561                                 unsigned int len)
1562 {
1563         struct bpf_prog *new;
1564         u32 fd;
1565 
1566         if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
1567                 return -EPERM;
1568         if (len != sizeof(fd))
1569                 return -EINVAL;
1570         if (copy_from_user(&fd, data, len))
1571                 return -EFAULT;
1572 
1573         new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
1574         if (IS_ERR(new))
1575                 return PTR_ERR(new);
1576 
1577         __fanout_set_data_bpf(po->fanout, new);
1578         return 0;
1579 }
1580 
1581 static int fanout_set_data(struct packet_sock *po, char __user *data,
1582                            unsigned int len)
1583 {
1584         switch (po->fanout->type) {
1585         case PACKET_FANOUT_CBPF:
1586                 return fanout_set_data_cbpf(po, data, len);
1587         case PACKET_FANOUT_EBPF:
1588                 return fanout_set_data_ebpf(po, data, len);
1589         default:
1590                 return -EINVAL;
1591         }
1592 }
1593 
1594 static void fanout_release_data(struct packet_fanout *f)
1595 {
1596         switch (f->type) {
1597         case PACKET_FANOUT_CBPF:
1598         case PACKET_FANOUT_EBPF:
1599                 __fanout_set_data_bpf(f, NULL);
1600         }
1601 }
1602 
1603 static bool __fanout_id_is_free(struct sock *sk, u16 candidate_id)
1604 {
1605         struct packet_fanout *f;
1606 
1607         list_for_each_entry(f, &fanout_list, list) {
1608                 if (f->id == candidate_id &&
1609                     read_pnet(&f->net) == sock_net(sk)) {
1610                         return false;
1611                 }
1612         }
1613         return true;
1614 }
1615 
1616 static bool fanout_find_new_id(struct sock *sk, u16 *new_id)
1617 {
1618         u16 id = fanout_next_id;
1619 
1620         do {
1621                 if (__fanout_id_is_free(sk, id)) {
1622                         *new_id = id;
1623                         fanout_next_id = id + 1;
1624                         return true;
1625                 }
1626 
1627                 id++;
1628         } while (id != fanout_next_id);
1629 
1630         return false;
1631 }
1632 
1633 static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1634 {
1635         struct packet_rollover *rollover = NULL;
1636         struct packet_sock *po = pkt_sk(sk);
1637         struct packet_fanout *f, *match;
1638         u8 type = type_flags & 0xff;
1639         u8 flags = type_flags >> 8;
1640         int err;
1641 
1642         switch (type) {
1643         case PACKET_FANOUT_ROLLOVER:
1644                 if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)
1645                         return -EINVAL;
1646         case PACKET_FANOUT_HASH:
1647         case PACKET_FANOUT_LB:
1648         case PACKET_FANOUT_CPU:
1649         case PACKET_FANOUT_RND:
1650         case PACKET_FANOUT_QM:
1651         case PACKET_FANOUT_CBPF:
1652         case PACKET_FANOUT_EBPF:
1653                 break;
1654         default:
1655                 return -EINVAL;
1656         }
1657 
1658         mutex_lock(&fanout_mutex);
1659 
1660         err = -EALREADY;
1661         if (po->fanout)
1662                 goto out;
1663 
1664         if (type == PACKET_FANOUT_ROLLOVER ||
1665             (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) {
1666                 err = -ENOMEM;
1667                 rollover = kzalloc(sizeof(*rollover), GFP_KERNEL);
1668                 if (!rollover)
1669                         goto out;
1670                 atomic_long_set(&rollover->num, 0);
1671                 atomic_long_set(&rollover->num_huge, 0);
1672                 atomic_long_set(&rollover->num_failed, 0);
1673         }
1674 
1675         if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) {
1676                 if (id != 0) {
1677                         err = -EINVAL;
1678                         goto out;
1679                 }
1680                 if (!fanout_find_new_id(sk, &id)) {
1681                         err = -ENOMEM;
1682                         goto out;
1683                 }
1684                 
1685                 flags &= ~(PACKET_FANOUT_FLAG_UNIQUEID >> 8);
1686         }
1687 
1688         match = NULL;
1689         list_for_each_entry(f, &fanout_list, list) {
1690                 if (f->id == id &&
1691                     read_pnet(&f->net) == sock_net(sk)) {
1692                         match = f;
1693                         break;
1694                 }
1695         }
1696         err = -EINVAL;
1697         if (match && match->flags != flags)
1698                 goto out;
1699         if (!match) {
1700                 err = -ENOMEM;
1701                 match = kzalloc(sizeof(*match), GFP_KERNEL);
1702                 if (!match)
1703                         goto out;
1704                 write_pnet(&match->net, sock_net(sk));
1705                 match->id = id;
1706                 match->type = type;
1707                 match->flags = flags;
1708                 INIT_LIST_HEAD(&match->list);
1709                 spin_lock_init(&match->lock);
1710                 refcount_set(&match->sk_ref, 0);
1711                 fanout_init_data(match);
1712                 match->prot_hook.type = po->prot_hook.type;
1713                 match->prot_hook.dev = po->prot_hook.dev;
1714                 match->prot_hook.func = packet_rcv_fanout;
1715                 match->prot_hook.af_packet_priv = match;
1716                 match->prot_hook.id_match = match_fanout_group;
1717                 list_add(&match->list, &fanout_list);
1718         }
1719         err = -EINVAL;
1720 
1721         spin_lock(&po->bind_lock);
1722         if (po->running &&
1723             match->type == type &&
1724             match->prot_hook.type == po->prot_hook.type &&
1725             match->prot_hook.dev == po->prot_hook.dev) {
1726                 err = -ENOSPC;
1727                 if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
1728                         __dev_remove_pack(&po->prot_hook);
1729                         po->fanout = match;
1730                         po->rollover = rollover;
1731                         rollover = NULL;
1732                         refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
1733                         __fanout_link(sk, po);
1734                         err = 0;
1735                 }
1736         }
1737         spin_unlock(&po->bind_lock);
1738 
1739         if (err && !refcount_read(&match->sk_ref)) {
1740                 list_del(&match->list);
1741                 kfree(match);
1742         }
1743 
1744 out:
1745         kfree(rollover);
1746         mutex_unlock(&fanout_mutex);
1747         return err;
1748 }
1749 
1750 
1751 
1752 
1753 
1754 
1755 static struct packet_fanout *fanout_release(struct sock *sk)
1756 {
1757         struct packet_sock *po = pkt_sk(sk);
1758         struct packet_fanout *f;
1759 
1760         mutex_lock(&fanout_mutex);
1761         f = po->fanout;
1762         if (f) {
1763                 po->fanout = NULL;
1764 
1765                 if (refcount_dec_and_test(&f->sk_ref))
1766                         list_del(&f->list);
1767                 else
1768                         f = NULL;
1769         }
1770         mutex_unlock(&fanout_mutex);
1771 
1772         return f;
1773 }
1774 
1775 static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
1776                                           struct sk_buff *skb)
1777 {
1778         
1779 
1780 
1781 
1782         if (unlikely(dev->type != ARPHRD_ETHER))
1783                 return false;
1784 
1785         skb_reset_mac_header(skb);
1786         return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q));
1787 }
1788 
1789 static const struct proto_ops packet_ops;
1790 
1791 static const struct proto_ops packet_ops_spkt;
1792 
1793 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
1794                            struct packet_type *pt, struct net_device *orig_dev)
1795 {
1796         struct sock *sk;
1797         struct sockaddr_pkt *spkt;
1798 
1799         
1800 
1801 
1802 
1803 
1804         sk = pt->af_packet_priv;
1805 
1806         
1807 
1808 
1809 
1810 
1811 
1812 
1813 
1814 
1815 
1816 
1817         if (skb->pkt_type == PACKET_LOOPBACK)
1818                 goto out;
1819 
1820         if (!net_eq(dev_net(dev), sock_net(sk)))
1821                 goto out;
1822 
1823         skb = skb_share_check(skb, GFP_ATOMIC);
1824         if (skb == NULL)
1825                 goto oom;
1826 
1827         
1828         skb_dst_drop(skb);
1829 
1830         
1831         nf_reset_ct(skb);
1832 
1833         spkt = &PACKET_SKB_CB(skb)->sa.pkt;
1834 
1835         skb_push(skb, skb->data - skb_mac_header(skb));
1836 
1837         
1838 
1839 
1840 
1841         spkt->spkt_family = dev->type;
1842         strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
1843         spkt->spkt_protocol = skb->protocol;
1844 
1845         
1846 
1847 
1848 
1849 
1850         if (sock_queue_rcv_skb(sk, skb) == 0)
1851                 return 0;
1852 
1853 out:
1854         kfree_skb(skb);
1855 oom:
1856         return 0;
1857 }
1858 
1859 static void packet_parse_headers(struct sk_buff *skb, struct socket *sock)
1860 {
1861         if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) &&
1862             sock->type == SOCK_RAW) {
1863                 skb_reset_mac_header(skb);
1864                 skb->protocol = dev_parse_header_protocol(skb);
1865         }
1866 
1867         skb_probe_transport_header(skb);
1868 }
1869 
1870 
1871 
1872 
1873 
1874 
1875 static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
1876                                size_t len)
1877 {
1878         struct sock *sk = sock->sk;
1879         DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name);
1880         struct sk_buff *skb = NULL;
1881         struct net_device *dev;
1882         struct sockcm_cookie sockc;
1883         __be16 proto = 0;
1884         int err;
1885         int extra_len = 0;
1886 
1887         
1888 
1889 
1890 
1891         if (saddr) {
1892                 if (msg->msg_namelen < sizeof(struct sockaddr))
1893                         return -EINVAL;
1894                 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
1895                         proto = saddr->spkt_protocol;
1896         } else
1897                 return -ENOTCONN;       
1898 
1899         
1900 
1901 
1902 
1903         saddr->spkt_device[sizeof(saddr->spkt_device) - 1] = 0;
1904 retry:
1905         rcu_read_lock();
1906         dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
1907         err = -ENODEV;
1908         if (dev == NULL)
1909                 goto out_unlock;
1910 
1911         err = -ENETDOWN;
1912         if (!(dev->flags & IFF_UP))
1913                 goto out_unlock;
1914 
1915         
1916 
1917 
1918 
1919 
1920         if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
1921                 if (!netif_supports_nofcs(dev)) {
1922                         err = -EPROTONOSUPPORT;
1923                         goto out_unlock;
1924                 }
1925                 extra_len = 4; 
1926         }
1927 
1928         err = -EMSGSIZE;
1929         if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len)
1930                 goto out_unlock;
1931 
1932         if (!skb) {
1933                 size_t reserved = LL_RESERVED_SPACE(dev);
1934                 int tlen = dev->needed_tailroom;
1935                 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
1936 
1937                 rcu_read_unlock();
1938                 skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL);
1939                 if (skb == NULL)
1940                         return -ENOBUFS;
1941                 
1942 
1943 
1944 
1945                 skb_reserve(skb, reserved);
1946                 skb_reset_network_header(skb);
1947 
1948                 
1949                 if (hhlen) {
1950                         skb->data -= hhlen;
1951                         skb->tail -= hhlen;
1952                         if (len < hhlen)
1953                                 skb_reset_network_header(skb);
1954                 }
1955                 err = memcpy_from_msg(skb_put(skb, len), msg, len);
1956                 if (err)
1957                         goto out_free;
1958                 goto retry;
1959         }
1960 
1961         if (!dev_validate_header(dev, skb->data, len)) {
1962                 err = -EINVAL;
1963                 goto out_unlock;
1964         }
1965         if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
1966             !packet_extra_vlan_len_allowed(dev, skb)) {
1967                 err = -EMSGSIZE;
1968                 goto out_unlock;
1969         }
1970 
1971         sockcm_init(&sockc, sk);
1972         if (msg->msg_controllen) {
1973                 err = sock_cmsg_send(sk, msg, &sockc);
1974                 if (unlikely(err))
1975                         goto out_unlock;
1976         }
1977 
1978         skb->protocol = proto;
1979         skb->dev = dev;
1980         skb->priority = sk->sk_priority;
1981         skb->mark = sk->sk_mark;
1982         skb->tstamp = sockc.transmit_time;
1983 
1984         skb_setup_tx_timestamp(skb, sockc.tsflags);
1985 
1986         if (unlikely(extra_len == 4))
1987                 skb->no_fcs = 1;
1988 
1989         packet_parse_headers(skb, sock);
1990 
1991         dev_queue_xmit(skb);
1992         rcu_read_unlock();
1993         return len;
1994 
1995 out_unlock:
1996         rcu_read_unlock();
1997 out_free:
1998         kfree_skb(skb);
1999         return err;
2000 }
2001 
2002 static unsigned int run_filter(struct sk_buff *skb,
2003                                const struct sock *sk,
2004                                unsigned int res)
2005 {
2006         struct sk_filter *filter;
2007 
2008         rcu_read_lock();
2009         filter = rcu_dereference(sk->sk_filter);
2010         if (filter != NULL)
2011                 res = bpf_prog_run_clear_cb(filter->prog, skb);
2012         rcu_read_unlock();
2013 
2014         return res;
2015 }
2016 
2017 static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
2018                            size_t *len)
2019 {
2020         struct virtio_net_hdr vnet_hdr;
2021 
2022         if (*len < sizeof(vnet_hdr))
2023                 return -EINVAL;
2024         *len -= sizeof(vnet_hdr);
2025 
2026         if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true, 0))
2027                 return -EINVAL;
2028 
2029         return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
2030 }
2031 
2032 
2033 
2034 
2035 
2036 
2037 
2038 
2039 
2040 
2041 
2042 
2043 
2044 static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
2045                       struct packet_type *pt, struct net_device *orig_dev)
2046 {
2047         struct sock *sk;
2048         struct sockaddr_ll *sll;
2049         struct packet_sock *po;
2050         u8 *skb_head = skb->data;
2051         int skb_len = skb->len;
2052         unsigned int snaplen, res;
2053         bool is_drop_n_account = false;
2054 
2055         if (skb->pkt_type == PACKET_LOOPBACK)
2056                 goto drop;
2057 
2058         sk = pt->af_packet_priv;
2059         po = pkt_sk(sk);
2060 
2061         if (!net_eq(dev_net(dev), sock_net(sk)))
2062                 goto drop;
2063 
2064         skb->dev = dev;
2065 
2066         if (dev->header_ops) {
2067                 
2068 
2069 
2070 
2071 
2072 
2073 
2074                 if (sk->sk_type != SOCK_DGRAM)
2075                         skb_push(skb, skb->data - skb_mac_header(skb));
2076                 else if (skb->pkt_type == PACKET_OUTGOING) {
2077                         
2078                         skb_pull(skb, skb_network_offset(skb));
2079                 }
2080         }
2081 
2082         snaplen = skb->len;
2083 
2084         res = run_filter(skb, sk, snaplen);
2085         if (!res)
2086                 goto drop_n_restore;
2087         if (snaplen > res)
2088                 snaplen = res;
2089 
2090         if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2091                 goto drop_n_acct;
2092 
2093         if (skb_shared(skb)) {
2094                 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
2095                 if (nskb == NULL)
2096                         goto drop_n_acct;
2097 
2098                 if (skb_head != skb->data) {
2099                         skb->data = skb_head;
2100                         skb->len = skb_len;
2101                 }
2102                 consume_skb(skb);
2103                 skb = nskb;
2104         }
2105 
2106         sock_skb_cb_check_size(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8);
2107 
2108         sll = &PACKET_SKB_CB(skb)->sa.ll;
2109         sll->sll_hatype = dev->type;
2110         sll->sll_pkttype = skb->pkt_type;
2111         if (unlikely(po->origdev))
2112                 sll->sll_ifindex = orig_dev->ifindex;
2113         else
2114                 sll->sll_ifindex = dev->ifindex;
2115 
2116         sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
2117 
2118         
2119 
2120 
2121         PACKET_SKB_CB(skb)->sa.origlen = skb->len;
2122 
2123         if (pskb_trim(skb, snaplen))
2124                 goto drop_n_acct;
2125 
2126         skb_set_owner_r(skb, sk);
2127         skb->dev = NULL;
2128         skb_dst_drop(skb);
2129 
2130         
2131         nf_reset_ct(skb);
2132 
2133         spin_lock(&sk->sk_receive_queue.lock);
2134         po->stats.stats1.tp_packets++;
2135         sock_skb_set_dropcount(sk, skb);
2136         __skb_queue_tail(&sk->sk_receive_queue, skb);
2137         spin_unlock(&sk->sk_receive_queue.lock);
2138         sk->sk_data_ready(sk);
2139         return 0;
2140 
2141 drop_n_acct:
2142         is_drop_n_account = true;
2143         atomic_inc(&po->tp_drops);
2144         atomic_inc(&sk->sk_drops);
2145 
2146 drop_n_restore:
2147         if (skb_head != skb->data && skb_shared(skb)) {
2148                 skb->data = skb_head;
2149                 skb->len = skb_len;
2150         }
2151 drop:
2152         if (!is_drop_n_account)
2153                 consume_skb(skb);
2154         else
2155                 kfree_skb(skb);
2156         return 0;
2157 }
2158 
2159 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
2160                        struct packet_type *pt, struct net_device *orig_dev)
2161 {
2162         struct sock *sk;
2163         struct packet_sock *po;
2164         struct sockaddr_ll *sll;
2165         union tpacket_uhdr h;
2166         u8 *skb_head = skb->data;
2167         int skb_len = skb->len;
2168         unsigned int snaplen, res;
2169         unsigned long status = TP_STATUS_USER;
2170         unsigned short macoff, netoff, hdrlen;
2171         struct sk_buff *copy_skb = NULL;
2172         struct timespec ts;
2173         __u32 ts_status;
2174         bool is_drop_n_account = false;
2175         unsigned int slot_id = 0;
2176         bool do_vnet = false;
2177 
2178         
2179 
2180 
2181 
2182         BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h2)) != 32);
2183         BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h3)) != 48);
2184 
2185         if (skb->pkt_type == PACKET_LOOPBACK)
2186                 goto drop;
2187 
2188         sk = pt->af_packet_priv;
2189         po = pkt_sk(sk);
2190 
2191         if (!net_eq(dev_net(dev), sock_net(sk)))
2192                 goto drop;
2193 
2194         if (dev->header_ops) {
2195                 if (sk->sk_type != SOCK_DGRAM)
2196                         skb_push(skb, skb->data - skb_mac_header(skb));
2197                 else if (skb->pkt_type == PACKET_OUTGOING) {
2198                         
2199                         skb_pull(skb, skb_network_offset(skb));
2200                 }
2201         }
2202 
2203         snaplen = skb->len;
2204 
2205         res = run_filter(skb, sk, snaplen);
2206         if (!res)
2207                 goto drop_n_restore;
2208 
2209         
2210         if (__packet_rcv_has_room(po, skb) == ROOM_NONE) {
2211                 atomic_inc(&po->tp_drops);
2212                 goto drop_n_restore;
2213         }
2214 
2215         if (skb->ip_summed == CHECKSUM_PARTIAL)
2216                 status |= TP_STATUS_CSUMNOTREADY;
2217         else if (skb->pkt_type != PACKET_OUTGOING &&
2218                  (skb->ip_summed == CHECKSUM_COMPLETE ||
2219                   skb_csum_unnecessary(skb)))
2220                 status |= TP_STATUS_CSUM_VALID;
2221 
2222         if (snaplen > res)
2223                 snaplen = res;
2224 
2225         if (sk->sk_type == SOCK_DGRAM) {
2226                 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
2227                                   po->tp_reserve;
2228         } else {
2229                 unsigned int maclen = skb_network_offset(skb);
2230                 netoff = TPACKET_ALIGN(po->tp_hdrlen +
2231                                        (maclen < 16 ? 16 : maclen)) +
2232                                        po->tp_reserve;
2233                 if (po->has_vnet_hdr) {
2234                         netoff += sizeof(struct virtio_net_hdr);
2235                         do_vnet = true;
2236                 }
2237                 macoff = netoff - maclen;
2238         }
2239         if (po->tp_version <= TPACKET_V2) {
2240                 if (macoff + snaplen > po->rx_ring.frame_size) {
2241                         if (po->copy_thresh &&
2242                             atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
2243                                 if (skb_shared(skb)) {
2244                                         copy_skb = skb_clone(skb, GFP_ATOMIC);
2245                                 } else {
2246                                         copy_skb = skb_get(skb);
2247                                         skb_head = skb->data;
2248                                 }
2249                                 if (copy_skb)
2250                                         skb_set_owner_r(copy_skb, sk);
2251                         }
2252                         snaplen = po->rx_ring.frame_size - macoff;
2253                         if ((int)snaplen < 0) {
2254                                 snaplen = 0;
2255                                 do_vnet = false;
2256                         }
2257                 }
2258         } else if (unlikely(macoff + snaplen >
2259                             GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
2260                 u32 nval;
2261 
2262                 nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff;
2263                 pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n",
2264                             snaplen, nval, macoff);
2265                 snaplen = nval;
2266                 if (unlikely((int)snaplen < 0)) {
2267                         snaplen = 0;
2268                         macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
2269                         do_vnet = false;
2270                 }
2271         }
2272         spin_lock(&sk->sk_receive_queue.lock);
2273         h.raw = packet_current_rx_frame(po, skb,
2274                                         TP_STATUS_KERNEL, (macoff+snaplen));
2275         if (!h.raw)
2276                 goto drop_n_account;
2277 
2278         if (po->tp_version <= TPACKET_V2) {
2279                 slot_id = po->rx_ring.head;
2280                 if (test_bit(slot_id, po->rx_ring.rx_owner_map))
2281                         goto drop_n_account;
2282                 __set_bit(slot_id, po->rx_ring.rx_owner_map);
2283         }
2284 
2285         if (do_vnet &&
2286             virtio_net_hdr_from_skb(skb, h.raw + macoff -
2287                                     sizeof(struct virtio_net_hdr),
2288                                     vio_le(), true, 0))
2289                 goto drop_n_account;
2290 
2291         if (po->tp_version <= TPACKET_V2) {
2292                 packet_increment_rx_head(po, &po->rx_ring);
2293         
2294 
2295 
2296 
2297 
2298 
2299                 if (atomic_read(&po->tp_drops))
2300                         status |= TP_STATUS_LOSING;
2301         }
2302 
2303         po->stats.stats1.tp_packets++;
2304         if (copy_skb) {
2305                 status |= TP_STATUS_COPY;
2306                 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
2307         }
2308         spin_unlock(&sk->sk_receive_queue.lock);
2309 
2310         skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
2311 
2312         if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
2313                 getnstimeofday(&ts);
2314 
2315         status |= ts_status;
2316 
2317         switch (po->tp_version) {
2318         case TPACKET_V1:
2319                 h.h1->tp_len = skb->len;
2320                 h.h1->tp_snaplen = snaplen;
2321                 h.h1->tp_mac = macoff;
2322                 h.h1->tp_net = netoff;
2323                 h.h1->tp_sec = ts.tv_sec;
2324                 h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
2325                 hdrlen = sizeof(*h.h1);
2326                 break;
2327         case TPACKET_V2:
2328                 h.h2->tp_len = skb->len;
2329                 h.h2->tp_snaplen = snaplen;
2330                 h.h2->tp_mac = macoff;
2331                 h.h2->tp_net = netoff;
2332                 h.h2->tp_sec = ts.tv_sec;
2333                 h.h2->tp_nsec = ts.tv_nsec;
2334                 if (skb_vlan_tag_present(skb)) {
2335                         h.h2->tp_vlan_tci = skb_vlan_tag_get(skb);
2336                         h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto);
2337                         status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
2338                 } else {
2339                         h.h2->tp_vlan_tci = 0;
2340                         h.h2->tp_vlan_tpid = 0;
2341                 }
2342                 memset(h.h2->tp_padding, 0, sizeof(h.h2->tp_padding));
2343                 hdrlen = sizeof(*h.h2);
2344                 break;
2345         case TPACKET_V3:
2346                 
2347 
2348 
2349                 h.h3->tp_status |= status;
2350                 h.h3->tp_len = skb->len;
2351                 h.h3->tp_snaplen = snaplen;
2352                 h.h3->tp_mac = macoff;
2353                 h.h3->tp_net = netoff;
2354                 h.h3->tp_sec  = ts.tv_sec;
2355                 h.h3->tp_nsec = ts.tv_nsec;
2356                 memset(h.h3->tp_padding, 0, sizeof(h.h3->tp_padding));
2357                 hdrlen = sizeof(*h.h3);
2358                 break;
2359         default:
2360                 BUG();
2361         }
2362 
2363         sll = h.raw + TPACKET_ALIGN(hdrlen);
2364         sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
2365         sll->sll_family = AF_PACKET;
2366         sll->sll_hatype = dev->type;
2367         sll->sll_protocol = skb->protocol;
2368         sll->sll_pkttype = skb->pkt_type;
2369         if (unlikely(po->origdev))
2370                 sll->sll_ifindex = orig_dev->ifindex;
2371         else
2372                 sll->sll_ifindex = dev->ifindex;
2373 
2374         smp_mb();
2375 
2376 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
2377         if (po->tp_version <= TPACKET_V2) {
2378                 u8 *start, *end;
2379 
2380                 end = (u8 *) PAGE_ALIGN((unsigned long) h.raw +
2381                                         macoff + snaplen);
2382 
2383                 for (start = h.raw; start < end; start += PAGE_SIZE)
2384                         flush_dcache_page(pgv_to_page(start));
2385         }
2386         smp_wmb();
2387 #endif
2388 
2389         if (po->tp_version <= TPACKET_V2) {
2390                 spin_lock(&sk->sk_receive_queue.lock);
2391                 __packet_set_status(po, h.raw, status);
2392                 __clear_bit(slot_id, po->rx_ring.rx_owner_map);
2393                 spin_unlock(&sk->sk_receive_queue.lock);
2394                 sk->sk_data_ready(sk);
2395         } else {
2396                 prb_clear_blk_fill_status(&po->rx_ring);
2397         }
2398 
2399 drop_n_restore:
2400         if (skb_head != skb->data && skb_shared(skb)) {
2401                 skb->data = skb_head;
2402                 skb->len = skb_len;
2403         }
2404 drop:
2405         if (!is_drop_n_account)
2406                 consume_skb(skb);
2407         else
2408                 kfree_skb(skb);
2409         return 0;
2410 
2411 drop_n_account:
2412         spin_unlock(&sk->sk_receive_queue.lock);
2413         atomic_inc(&po->tp_drops);
2414         is_drop_n_account = true;
2415 
2416         sk->sk_data_ready(sk);
2417         kfree_skb(copy_skb);
2418         goto drop_n_restore;
2419 }
2420 
2421 static void tpacket_destruct_skb(struct sk_buff *skb)
2422 {
2423         struct packet_sock *po = pkt_sk(skb->sk);
2424 
2425         if (likely(po->tx_ring.pg_vec)) {
2426                 void *ph;
2427                 __u32 ts;
2428 
2429                 ph = skb_zcopy_get_nouarg(skb);
2430                 packet_dec_pending(&po->tx_ring);
2431 
2432                 ts = __packet_set_timestamp(po, ph, skb);
2433                 __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
2434 
2435                 if (!packet_read_pending(&po->tx_ring))
2436                         complete(&po->skb_completion);
2437         }
2438 
2439         sock_wfree(skb);
2440 }
2441 
2442 static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
2443 {
2444         if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
2445             (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
2446              __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 >
2447               __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len)))
2448                 vnet_hdr->hdr_len = __cpu_to_virtio16(vio_le(),
2449                          __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
2450                         __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2);
2451 
2452         if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len)
2453                 return -EINVAL;
2454 
2455         return 0;
2456 }
2457 
2458 static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
2459                                  struct virtio_net_hdr *vnet_hdr)
2460 {
2461         if (*len < sizeof(*vnet_hdr))
2462                 return -EINVAL;
2463         *len -= sizeof(*vnet_hdr);
2464 
2465         if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter))
2466                 return -EFAULT;
2467 
2468         return __packet_snd_vnet_parse(vnet_hdr, *len);
2469 }
2470 
2471 static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2472                 void *frame, struct net_device *dev, void *data, int tp_len,
2473                 __be16 proto, unsigned char *addr, int hlen, int copylen,
2474                 const struct sockcm_cookie *sockc)
2475 {
2476         union tpacket_uhdr ph;
2477         int to_write, offset, len, nr_frags, len_max;
2478         struct socket *sock = po->sk.sk_socket;
2479         struct page *page;
2480         int err;
2481 
2482         ph.raw = frame;
2483 
2484         skb->protocol = proto;
2485         skb->dev = dev;
2486         skb->priority = po->sk.sk_priority;
2487         skb->mark = po->sk.sk_mark;
2488         skb->tstamp = sockc->transmit_time;
2489         skb_setup_tx_timestamp(skb, sockc->tsflags);
2490         skb_zcopy_set_nouarg(skb, ph.raw);
2491 
2492         skb_reserve(skb, hlen);
2493         skb_reset_network_header(skb);
2494 
2495         to_write = tp_len;
2496 
2497         if (sock->type == SOCK_DGRAM) {
2498                 err = dev_hard_header(skb, dev, ntohs(proto), addr,
2499                                 NULL, tp_len);
2500                 if (unlikely(err < 0))
2501                         return -EINVAL;
2502         } else if (copylen) {
2503                 int hdrlen = min_t(int, copylen, tp_len);
2504 
2505                 skb_push(skb, dev->hard_header_len);
2506                 skb_put(skb, copylen - dev->hard_header_len);
2507                 err = skb_store_bits(skb, 0, data, hdrlen);
2508                 if (unlikely(err))
2509                         return err;
2510                 if (!dev_validate_header(dev, skb->data, hdrlen))
2511                         return -EINVAL;
2512 
2513                 data += hdrlen;
2514                 to_write -= hdrlen;
2515         }
2516 
2517         offset = offset_in_page(data);
2518         len_max = PAGE_SIZE - offset;
2519         len = ((to_write > len_max) ? len_max : to_write);
2520 
2521         skb->data_len = to_write;
2522         skb->len += to_write;
2523         skb->truesize += to_write;
2524         refcount_add(to_write, &po->sk.sk_wmem_alloc);
2525 
2526         while (likely(to_write)) {
2527                 nr_frags = skb_shinfo(skb)->nr_frags;
2528 
2529                 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
2530                         pr_err("Packet exceed the number of skb frags(%lu)\n",
2531                                MAX_SKB_FRAGS);
2532                         return -EFAULT;
2533                 }
2534 
2535                 page = pgv_to_page(data);
2536                 data += len;
2537                 flush_dcache_page(page);
2538                 get_page(page);
2539                 skb_fill_page_desc(skb, nr_frags, page, offset, len);
2540                 to_write -= len;
2541                 offset = 0;
2542                 len_max = PAGE_SIZE;
2543                 len = ((to_write > len_max) ? len_max : to_write);
2544         }
2545 
2546         packet_parse_headers(skb, sock);
2547 
2548         return tp_len;
2549 }
2550 
2551 static int tpacket_parse_header(struct packet_sock *po, void *frame,
2552                                 int size_max, void **data)
2553 {
2554         union tpacket_uhdr ph;
2555         int tp_len, off;
2556 
2557         ph.raw = frame;
2558 
2559         switch (po->tp_version) {
2560         case TPACKET_V3:
2561                 if (ph.h3->tp_next_offset != 0) {
2562                         pr_warn_once("variable sized slot not supported");
2563                         return -EINVAL;
2564                 }
2565                 tp_len = ph.h3->tp_len;
2566                 break;
2567         case TPACKET_V2:
2568                 tp_len = ph.h2->tp_len;
2569                 break;
2570         default:
2571                 tp_len = ph.h1->tp_len;
2572                 break;
2573         }
2574         if (unlikely(tp_len > size_max)) {
2575                 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
2576                 return -EMSGSIZE;
2577         }
2578 
2579         if (unlikely(po->tp_tx_has_off)) {
2580                 int off_min, off_max;
2581 
2582                 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
2583                 off_max = po->tx_ring.frame_size - tp_len;
2584                 if (po->sk.sk_type == SOCK_DGRAM) {
2585                         switch (po->tp_version) {
2586                         case TPACKET_V3:
2587                                 off = ph.h3->tp_net;
2588                                 break;
2589                         case TPACKET_V2:
2590                                 off = ph.h2->tp_net;
2591                                 break;
2592                         default:
2593                                 off = ph.h1->tp_net;
2594                                 break;
2595                         }
2596                 } else {
2597                         switch (po->tp_version) {
2598                         case TPACKET_V3:
2599                                 off = ph.h3->tp_mac;
2600                                 break;
2601                         case TPACKET_V2:
2602                                 off = ph.h2->tp_mac;
2603                                 break;
2604                         default:
2605                                 off = ph.h1->tp_mac;
2606                                 break;
2607                         }
2608                 }
2609                 if (unlikely((off < off_min) || (off_max < off)))
2610                         return -EINVAL;
2611         } else {
2612                 off = po->tp_hdrlen - sizeof(struct sockaddr_ll);
2613         }
2614 
2615         *data = frame + off;
2616         return tp_len;
2617 }
2618 
2619 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2620 {
2621         struct sk_buff *skb = NULL;
2622         struct net_device *dev;
2623         struct virtio_net_hdr *vnet_hdr = NULL;
2624         struct sockcm_cookie sockc;
2625         __be16 proto;
2626         int err, reserve = 0;
2627         void *ph;
2628         DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
2629         bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
2630         unsigned char *addr = NULL;
2631         int tp_len, size_max;
2632         void *data;
2633         int len_sum = 0;
2634         int status = TP_STATUS_AVAILABLE;
2635         int hlen, tlen, copylen = 0;
2636         long timeo = 0;
2637 
2638         mutex_lock(&po->pg_vec_lock);
2639 
2640         
2641 
2642 
2643         if (unlikely(!po->tx_ring.pg_vec)) {
2644                 err = -EBUSY;
2645                 goto out;
2646         }
2647         if (likely(saddr == NULL)) {
2648                 dev     = packet_cached_dev_get(po);
2649                 proto   = po->num;
2650         } else {
2651                 err = -EINVAL;
2652                 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
2653                         goto out;
2654                 if (msg->msg_namelen < (saddr->sll_halen
2655                                         + offsetof(struct sockaddr_ll,
2656                                                 sll_addr)))
2657                         goto out;
2658                 proto   = saddr->sll_protocol;
2659                 dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
2660                 if (po->sk.sk_socket->type == SOCK_DGRAM) {
2661                         if (dev && msg->msg_namelen < dev->addr_len +
2662                                    offsetof(struct sockaddr_ll, sll_addr))
2663                                 goto out_put;
2664                         addr = saddr->sll_addr;
2665                 }
2666         }
2667 
2668         err = -ENXIO;
2669         if (unlikely(dev == NULL))
2670                 goto out;
2671         err = -ENETDOWN;
2672         if (unlikely(!(dev->flags & IFF_UP)))
2673                 goto out_put;
2674 
2675         sockcm_init(&sockc, &po->sk);
2676         if (msg->msg_controllen) {
2677                 err = sock_cmsg_send(&po->sk, msg, &sockc);
2678                 if (unlikely(err))
2679                         goto out_put;
2680         }
2681 
2682         if (po->sk.sk_socket->type == SOCK_RAW)
2683                 reserve = dev->hard_header_len;
2684         size_max = po->tx_ring.frame_size
2685                 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
2686 
2687         if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
2688                 size_max = dev->mtu + reserve + VLAN_HLEN;
2689 
2690         reinit_completion(&po->skb_completion);
2691 
2692         do {
2693                 ph = packet_current_frame(po, &po->tx_ring,
2694                                           TP_STATUS_SEND_REQUEST);
2695                 if (unlikely(ph == NULL)) {
2696                         if (need_wait && skb) {
2697                                 timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT);
2698                                 timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo);
2699                                 if (timeo <= 0) {
2700                                         err = !timeo ? -ETIMEDOUT : -ERESTARTSYS;
2701                                         goto out_put;
2702                                 }
2703                         }
2704                         
2705                         continue;
2706                 }
2707 
2708                 skb = NULL;
2709                 tp_len = tpacket_parse_header(po, ph, size_max, &data);
2710                 if (tp_len < 0)
2711                         goto tpacket_error;
2712 
2713                 status = TP_STATUS_SEND_REQUEST;
2714                 hlen = LL_RESERVED_SPACE(dev);
2715                 tlen = dev->needed_tailroom;
2716                 if (po->has_vnet_hdr) {
2717                         vnet_hdr = data;
2718                         data += sizeof(*vnet_hdr);
2719                         tp_len -= sizeof(*vnet_hdr);
2720                         if (tp_len < 0 ||
2721                             __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
2722                                 tp_len = -EINVAL;
2723                                 goto tpacket_error;
2724                         }
2725                         copylen = __virtio16_to_cpu(vio_le(),
2726                                                     vnet_hdr->hdr_len);
2727                 }
2728                 copylen = max_t(int, copylen, dev->hard_header_len);
2729                 skb = sock_alloc_send_skb(&po->sk,
2730                                 hlen + tlen + sizeof(struct sockaddr_ll) +
2731                                 (copylen - dev->hard_header_len),
2732                                 !need_wait, &err);
2733 
2734                 if (unlikely(skb == NULL)) {
2735                         
2736                         if (likely(len_sum > 0))
2737                                 err = len_sum;
2738                         goto out_status;
2739                 }
2740                 tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto,
2741                                           addr, hlen, copylen, &sockc);
2742                 if (likely(tp_len >= 0) &&
2743                     tp_len > dev->mtu + reserve &&
2744                     !po->has_vnet_hdr &&
2745                     !packet_extra_vlan_len_allowed(dev, skb))
2746                         tp_len = -EMSGSIZE;
2747 
2748                 if (unlikely(tp_len < 0)) {
2749 tpacket_error:
2750                         if (po->tp_loss) {
2751                                 __packet_set_status(po, ph,
2752                                                 TP_STATUS_AVAILABLE);
2753                                 packet_increment_head(&po->tx_ring);
2754                                 kfree_skb(skb);
2755                                 continue;
2756                         } else {
2757                                 status = TP_STATUS_WRONG_FORMAT;
2758                                 err = tp_len;
2759                                 goto out_status;
2760                         }
2761                 }
2762 
2763                 if (po->has_vnet_hdr) {
2764                         if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
2765                                 tp_len = -EINVAL;
2766                                 goto tpacket_error;
2767                         }
2768                         virtio_net_hdr_set_proto(skb, vnet_hdr);
2769                 }
2770 
2771                 skb->destructor = tpacket_destruct_skb;
2772                 __packet_set_status(po, ph, TP_STATUS_SENDING);
2773                 packet_inc_pending(&po->tx_ring);
2774 
2775                 status = TP_STATUS_SEND_REQUEST;
2776                 err = po->xmit(skb);
2777                 if (unlikely(err > 0)) {
2778                         err = net_xmit_errno(err);
2779                         if (err && __packet_get_status(po, ph) ==
2780                                    TP_STATUS_AVAILABLE) {
2781                                 
2782                                 skb = NULL;
2783                                 goto out_status;
2784                         }
2785                         
2786 
2787 
2788 
2789                         err = 0;
2790                 }
2791                 packet_increment_head(&po->tx_ring);
2792                 len_sum += tp_len;
2793         } while (likely((ph != NULL) ||
2794                 
2795 
2796 
2797 
2798 
2799 
2800                  (need_wait && packet_read_pending(&po->tx_ring))));
2801 
2802         err = len_sum;
2803         goto out_put;
2804 
2805 out_status:
2806         __packet_set_status(po, ph, status);
2807         kfree_skb(skb);
2808 out_put:
2809         dev_put(dev);
2810 out:
2811         mutex_unlock(&po->pg_vec_lock);
2812         return err;
2813 }
2814 
2815 static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
2816                                         size_t reserve, size_t len,
2817                                         size_t linear, int noblock,
2818                                         int *err)
2819 {
2820         struct sk_buff *skb;
2821 
2822         
2823         if (prepad + len < PAGE_SIZE || !linear)
2824                 linear = len;
2825 
2826         skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
2827                                    err, 0);
2828         if (!skb)
2829                 return NULL;
2830 
2831         skb_reserve(skb, reserve);
2832         skb_put(skb, linear);
2833         skb->data_len = len - linear;
2834         skb->len += len - linear;
2835 
2836         return skb;
2837 }
2838 
2839 static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2840 {
2841         struct sock *sk = sock->sk;
2842         DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
2843         struct sk_buff *skb;
2844         struct net_device *dev;
2845         __be16 proto;
2846         unsigned char *addr = NULL;
2847         int err, reserve = 0;
2848         struct sockcm_cookie sockc;
2849         struct virtio_net_hdr vnet_hdr = { 0 };
2850         int offset = 0;
2851         struct packet_sock *po = pkt_sk(sk);
2852         bool has_vnet_hdr = false;
2853         int hlen, tlen, linear;
2854         int extra_len = 0;
2855 
2856         
2857 
2858 
2859 
2860         if (likely(saddr == NULL)) {
2861                 dev     = packet_cached_dev_get(po);
2862                 proto   = po->num;
2863         } else {
2864                 err = -EINVAL;
2865                 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
2866                         goto out;
2867                 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
2868                         goto out;
2869                 proto   = saddr->sll_protocol;
2870                 dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
2871                 if (sock->type == SOCK_DGRAM) {
2872                         if (dev && msg->msg_namelen < dev->addr_len +
2873                                    offsetof(struct sockaddr_ll, sll_addr))
2874                                 goto out_unlock;
2875                         addr = saddr->sll_addr;
2876                 }
2877         }
2878 
2879         err = -ENXIO;
2880         if (unlikely(dev == NULL))
2881                 goto out_unlock;
2882         err = -ENETDOWN;
2883         if (unlikely(!(dev->flags & IFF_UP)))
2884                 goto out_unlock;
2885 
2886         sockcm_init(&sockc, sk);
2887         sockc.mark = sk->sk_mark;
2888         if (msg->msg_controllen) {
2889                 err = sock_cmsg_send(sk, msg, &sockc);
2890                 if (unlikely(err))
2891                         goto out_unlock;
2892         }
2893 
2894         if (sock->type == SOCK_RAW)
2895                 reserve = dev->hard_header_len;
2896         if (po->has_vnet_hdr) {
2897                 err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
2898                 if (err)
2899                         goto out_unlock;
2900                 has_vnet_hdr = true;
2901         }
2902 
2903         if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
2904                 if (!netif_supports_nofcs(dev)) {
2905                         err = -EPROTONOSUPPORT;
2906                         goto out_unlock;
2907                 }
2908                 extra_len = 4; 
2909         }
2910 
2911         err = -EMSGSIZE;
2912         if (!vnet_hdr.gso_type &&
2913             (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
2914                 goto out_unlock;
2915 
2916         err = -ENOBUFS;
2917         hlen = LL_RESERVED_SPACE(dev);
2918         tlen = dev->needed_tailroom;
2919         linear = __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len);
2920         linear = max(linear, min_t(int, len, dev->hard_header_len));
2921         skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, linear,
2922                                msg->msg_flags & MSG_DONTWAIT, &err);
2923         if (skb == NULL)
2924                 goto out_unlock;
2925 
2926         skb_reset_network_header(skb);
2927 
2928         err = -EINVAL;
2929         if (sock->type == SOCK_DGRAM) {
2930                 offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
2931                 if (unlikely(offset < 0))
2932                         goto out_free;
2933         } else if (reserve) {
2934                 skb_reserve(skb, -reserve);
2935                 if (len < reserve + sizeof(struct ipv6hdr) &&
2936                     dev->min_header_len != dev->hard_header_len)
2937                         skb_reset_network_header(skb);
2938         }
2939 
2940         
2941         err = skb_copy_datagram_from_iter(skb, offset, &msg->msg_iter, len);
2942         if (err)
2943                 goto out_free;
2944 
2945         if (sock->type == SOCK_RAW &&
2946             !dev_validate_header(dev, skb->data, len)) {
2947                 err = -EINVAL;
2948                 goto out_free;
2949         }
2950 
2951         skb_setup_tx_timestamp(skb, sockc.tsflags);
2952 
2953         if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
2954             !packet_extra_vlan_len_allowed(dev, skb)) {
2955                 err = -EMSGSIZE;
2956                 goto out_free;
2957         }
2958 
2959         skb->protocol = proto;
2960         skb->dev = dev;
2961         skb->priority = sk->sk_priority;
2962         skb->mark = sockc.mark;
2963         skb->tstamp = sockc.transmit_time;
2964 
2965         if (has_vnet_hdr) {
2966                 err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
2967                 if (err)
2968                         goto out_free;
2969                 len += sizeof(vnet_hdr);
2970                 virtio_net_hdr_set_proto(skb, &vnet_hdr);
2971         }
2972 
2973         packet_parse_headers(skb, sock);
2974 
2975         if (unlikely(extra_len == 4))
2976                 skb->no_fcs = 1;
2977 
2978         err = po->xmit(skb);
2979         if (err > 0 && (err = net_xmit_errno(err)) != 0)
2980                 goto out_unlock;
2981 
2982         dev_put(dev);
2983 
2984         return len;
2985 
2986 out_free:
2987         kfree_skb(skb);
2988 out_unlock:
2989         if (dev)
2990                 dev_put(dev);
2991 out:
2992         return err;
2993 }
2994 
2995 static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
2996 {
2997         struct sock *sk = sock->sk;
2998         struct packet_sock *po = pkt_sk(sk);
2999 
3000         if (po->tx_ring.pg_vec)
3001                 return tpacket_snd(po, msg);
3002         else
3003                 return packet_snd(sock, msg, len);
3004 }
3005 
3006 
3007 
3008 
3009 
3010 
3011 static int packet_release(struct socket *sock)
3012 {
3013         struct sock *sk = sock->sk;
3014         struct packet_sock *po;
3015         struct packet_fanout *f;
3016         struct net *net;
3017         union tpacket_req_u req_u;
3018 
3019         if (!sk)
3020                 return 0;
3021 
3022         net = sock_net(sk);
3023         po = pkt_sk(sk);
3024 
3025         mutex_lock(&net->packet.sklist_lock);
3026         sk_del_node_init_rcu(sk);
3027         mutex_unlock(&net->packet.sklist_lock);
3028 
3029         preempt_disable();
3030         sock_prot_inuse_add(net, sk->sk_prot, -1);
3031         preempt_enable();
3032 
3033         spin_lock(&po->bind_lock);
3034         unregister_prot_hook(sk, false);
3035         packet_cached_dev_reset(po);
3036 
3037         if (po->prot_hook.dev) {
3038                 dev_put(po->prot_hook.dev);
3039                 po->prot_hook.dev = NULL;
3040         }
3041         spin_unlock(&po->bind_lock);
3042 
3043         packet_flush_mclist(sk);
3044 
3045         lock_sock(sk);
3046         if (po->rx_ring.pg_vec) {
3047                 memset(&req_u, 0, sizeof(req_u));
3048                 packet_set_ring(sk, &req_u, 1, 0);
3049         }
3050 
3051         if (po->tx_ring.pg_vec) {
3052                 memset(&req_u, 0, sizeof(req_u));
3053                 packet_set_ring(sk, &req_u, 1, 1);
3054         }
3055         release_sock(sk);
3056 
3057         f = fanout_release(sk);
3058 
3059         synchronize_net();
3060 
3061         kfree(po->rollover);
3062         if (f) {
3063                 fanout_release_data(f);
3064                 kfree(f);
3065         }
3066         
3067 
3068 
3069         sock_orphan(sk);
3070         sock->sk = NULL;
3071 
3072         
3073 
3074         skb_queue_purge(&sk->sk_receive_queue);
3075         packet_free_pending(po);
3076         sk_refcnt_debug_release(sk);
3077 
3078         sock_put(sk);
3079         return 0;
3080 }
3081 
3082 
3083 
3084 
3085 
3086 static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
3087                           __be16 proto)
3088 {
3089         struct packet_sock *po = pkt_sk(sk);
3090         struct net_device *dev_curr;
3091         __be16 proto_curr;
3092         bool need_rehook;
3093         struct net_device *dev = NULL;
3094         int ret = 0;
3095         bool unlisted = false;
3096 
3097         lock_sock(sk);
3098         spin_lock(&po->bind_lock);
3099         rcu_read_lock();
3100 
3101         if (po->fanout) {
3102                 ret = -EINVAL;
3103                 goto out_unlock;
3104         }
3105 
3106         if (name) {
3107                 dev = dev_get_by_name_rcu(sock_net(sk), name);
3108                 if (!dev) {
3109                         ret = -ENODEV;
3110                         goto out_unlock;
3111                 }
3112         } else if (ifindex) {
3113                 dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
3114                 if (!dev) {
3115                         ret = -ENODEV;
3116                         goto out_unlock;
3117                 }
3118         }
3119 
3120         if (dev)
3121                 dev_hold(dev);
3122 
3123         proto_curr = po->prot_hook.type;
3124         dev_curr = po->prot_hook.dev;
3125 
3126         need_rehook = proto_curr != proto || dev_curr != dev;
3127 
3128         if (need_rehook) {
3129                 if (po->running) {
3130                         rcu_read_unlock();
3131                         
3132 
3133 
3134                         po->num = 0;
3135                         __unregister_prot_hook(sk, true);
3136                         rcu_read_lock();
3137                         dev_curr = po->prot_hook.dev;
3138                         if (dev)
3139                                 unlisted = !dev_get_by_index_rcu(sock_net(sk),
3140                                                                  dev->ifindex);
3141                 }
3142 
3143                 BUG_ON(po->running);
3144                 po->num = proto;
3145                 po->prot_hook.type = proto;
3146 
3147                 if (unlikely(unlisted)) {
3148                         dev_put(dev);
3149                         po->prot_hook.dev = NULL;
3150                         po->ifindex = -1;
3151                         packet_cached_dev_reset(po);
3152                 } else {
3153                         po->prot_hook.dev = dev;
3154                         po->ifindex = dev ? dev->ifindex : 0;
3155                         packet_cached_dev_assign(po, dev);
3156                 }
3157         }
3158         if (dev_curr)
3159                 dev_put(dev_curr);
3160 
3161         if (proto == 0 || !need_rehook)
3162                 goto out_unlock;
3163 
3164         if (!unlisted && (!dev || (dev->flags & IFF_UP))) {
3165                 register_prot_hook(sk);
3166         } else {
3167                 sk->sk_err = ENETDOWN;
3168                 if (!sock_flag(sk, SOCK_DEAD))
3169                         sk->sk_error_report(sk);
3170         }
3171 
3172 out_unlock:
3173         rcu_read_unlock();
3174         spin_unlock(&po->bind_lock);
3175         release_sock(sk);
3176         return ret;
3177 }
3178 
3179 
3180 
3181 
3182 
3183 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
3184                             int addr_len)
3185 {
3186         struct sock *sk = sock->sk;
3187         char name[sizeof(uaddr->sa_data) + 1];
3188 
3189         
3190 
3191 
3192 
3193         if (addr_len != sizeof(struct sockaddr))
3194                 return -EINVAL;
3195         
3196 
3197 
3198         memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data));
3199         name[sizeof(uaddr->sa_data)] = 0;
3200 
3201         return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
3202 }
3203 
3204 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
3205 {
3206         struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
3207         struct sock *sk = sock->sk;
3208 
3209         
3210 
3211 
3212 
3213         if (addr_len < sizeof(struct sockaddr_ll))
3214                 return -EINVAL;
3215         if (sll->sll_family != AF_PACKET)
3216                 return -EINVAL;
3217 
3218         return packet_do_bind(sk, NULL, sll->sll_ifindex,
3219                               sll->sll_protocol ? : pkt_sk(sk)->num);
3220 }
3221 
3222 static struct proto packet_proto = {
3223         .name     = "PACKET",
3224         .owner    = THIS_MODULE,
3225         .obj_size = sizeof(struct packet_sock),
3226 };
3227 
3228 
3229 
3230 
3231 
3232 static int packet_create(struct net *net, struct socket *sock, int protocol,
3233                          int kern)
3234 {
3235         struct sock *sk;
3236         struct packet_sock *po;
3237         __be16 proto = (__force __be16)protocol; 
3238         int err;
3239 
3240         if (!ns_capable(net->user_ns, CAP_NET_RAW))
3241                 return -EPERM;
3242         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
3243             sock->type != SOCK_PACKET)
3244                 return -ESOCKTNOSUPPORT;
3245 
3246         sock->state = SS_UNCONNECTED;
3247 
3248         err = -ENOBUFS;
3249         sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern);
3250         if (sk == NULL)
3251                 goto out;
3252 
3253         sock->ops = &packet_ops;
3254         if (sock->type == SOCK_PACKET)
3255                 sock->ops = &packet_ops_spkt;
3256 
3257         sock_init_data(sock, sk);
3258 
3259         po = pkt_sk(sk);
3260         init_completion(&po->skb_completion);
3261         sk->sk_family = PF_PACKET;
3262         po->num = proto;
3263         po->xmit = dev_queue_xmit;
3264 
3265         err = packet_alloc_pending(po);
3266         if (err)
3267                 goto out2;
3268 
3269         packet_cached_dev_reset(po);
3270 
3271         sk->sk_destruct = packet_sock_destruct;
3272         sk_refcnt_debug_inc(sk);
3273 
3274         
3275 
3276 
3277 
3278         spin_lock_init(&po->bind_lock);
3279         mutex_init(&po->pg_vec_lock);
3280         po->rollover = NULL;
3281         po->prot_hook.func = packet_rcv;
3282 
3283         if (sock->type == SOCK_PACKET)
3284                 po->prot_hook.func = packet_rcv_spkt;
3285 
3286         po->prot_hook.af_packet_priv = sk;
3287 
3288         if (proto) {
3289                 po->prot_hook.type = proto;
3290                 __register_prot_hook(sk);
3291         }
3292 
3293         mutex_lock(&net->packet.sklist_lock);
3294         sk_add_node_tail_rcu(sk, &net->packet.sklist);
3295         mutex_unlock(&net->packet.sklist_lock);
3296 
3297         preempt_disable();
3298         sock_prot_inuse_add(net, &packet_proto, 1);
3299         preempt_enable();
3300 
3301         return 0;
3302 out2:
3303         sk_free(sk);
3304 out:
3305         return err;
3306 }
3307 
3308 
3309 
3310 
3311 
3312 
3313 static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
3314                           int flags)
3315 {
3316         struct sock *sk = sock->sk;
3317         struct sk_buff *skb;
3318         int copied, err;
3319         int vnet_hdr_len = 0;
3320         unsigned int origlen = 0;
3321 
3322         err = -EINVAL;
3323         if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
3324                 goto out;
3325 
3326 #if 0
3327         
3328         if (pkt_sk(sk)->ifindex < 0)
3329                 return -ENODEV;
3330 #endif
3331 
3332         if (flags & MSG_ERRQUEUE) {
3333                 err = sock_recv_errqueue(sk, msg, len,
3334                                          SOL_PACKET, PACKET_TX_TIMESTAMP);
3335                 goto out;
3336         }
3337 
3338         
3339 
3340 
3341 
3342 
3343 
3344 
3345 
3346 
3347         skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
3348 
3349         
3350 
3351 
3352 
3353 
3354 
3355         if (skb == NULL)
3356                 goto out;
3357 
3358         packet_rcv_try_clear_pressure(pkt_sk(sk));
3359 
3360         if (pkt_sk(sk)->has_vnet_hdr) {
3361                 err = packet_rcv_vnet(msg, skb, &len);
3362                 if (err)
3363                         goto out_free;
3364                 vnet_hdr_len = sizeof(struct virtio_net_hdr);
3365         }
3366 
3367         
3368 
3369 
3370 
3371         copied = skb->len;
3372         if (copied > len) {
3373                 copied = len;
3374                 msg->msg_flags |= MSG_TRUNC;
3375         }
3376 
3377         err = skb_copy_datagram_msg(skb, 0, msg, copied);
3378         if (err)
3379                 goto out_free;
3380 
3381         if (sock->type != SOCK_PACKET) {
3382                 struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
3383 
3384                 
3385                 origlen = PACKET_SKB_CB(skb)->sa.origlen;
3386                 sll->sll_family = AF_PACKET;
3387                 sll->sll_protocol = skb->protocol;
3388         }
3389 
3390         sock_recv_ts_and_drops(msg, sk, skb);
3391 
3392         if (msg->msg_name) {
3393                 int copy_len;
3394 
3395                 
3396 
3397 
3398                 if (sock->type == SOCK_PACKET) {
3399                         __sockaddr_check_size(sizeof(struct sockaddr_pkt));
3400                         msg->msg_namelen = sizeof(struct sockaddr_pkt);
3401                         copy_len = msg->msg_namelen;
3402                 } else {
3403                         struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
3404 
3405                         msg->msg_namelen = sll->sll_halen +
3406                                 offsetof(struct sockaddr_ll, sll_addr);
3407                         copy_len = msg->msg_namelen;
3408                         if (msg->msg_namelen < sizeof(struct sockaddr_ll)) {
3409                                 memset(msg->msg_name +
3410                                        offsetof(struct sockaddr_ll, sll_addr),
3411                                        0, sizeof(sll->sll_addr));
3412                                 msg->msg_namelen = sizeof(struct sockaddr_ll);
3413                         }
3414                 }
3415                 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
3416         }
3417 
3418         if (pkt_sk(sk)->auxdata) {
3419                 struct tpacket_auxdata aux;
3420 
3421                 aux.tp_status = TP_STATUS_USER;
3422                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3423                         aux.tp_status |= TP_STATUS_CSUMNOTREADY;
3424                 else if (skb->pkt_type != PACKET_OUTGOING &&
3425                          (skb->ip_summed == CHECKSUM_COMPLETE ||
3426                           skb_csum_unnecessary(skb)))
3427                         aux.tp_status |= TP_STATUS_CSUM_VALID;
3428 
3429                 aux.tp_len = origlen;
3430                 aux.tp_snaplen = skb->len;
3431                 aux.tp_mac = 0;
3432                 aux.tp_net = skb_network_offset(skb);
3433                 if (skb_vlan_tag_present(skb)) {
3434                         aux.tp_vlan_tci = skb_vlan_tag_get(skb);
3435                         aux.tp_vlan_tpid = ntohs(skb->vlan_proto);
3436                         aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
3437                 } else {
3438                         aux.tp_vlan_tci = 0;
3439                         aux.tp_vlan_tpid = 0;
3440                 }
3441                 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
3442         }
3443 
3444         
3445 
3446 
3447 
3448         err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
3449 
3450 out_free:
3451         skb_free_datagram(sk, skb);
3452 out:
3453         return err;
3454 }
3455 
3456 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
3457                                int peer)
3458 {
3459         struct net_device *dev;
3460         struct sock *sk = sock->sk;
3461 
3462         if (peer)
3463                 return -EOPNOTSUPP;
3464 
3465         uaddr->sa_family = AF_PACKET;
3466         memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
3467         rcu_read_lock();
3468         dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
3469         if (dev)
3470                 strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
3471         rcu_read_unlock();
3472 
3473         return sizeof(*uaddr);
3474 }
3475 
3476 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
3477                           int peer)
3478 {
3479         struct net_device *dev;
3480         struct sock *sk = sock->sk;
3481         struct packet_sock *po = pkt_sk(sk);
3482         DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
3483 
3484         if (peer)
3485                 return -EOPNOTSUPP;
3486 
3487         sll->sll_family = AF_PACKET;
3488         sll->sll_ifindex = po->ifindex;
3489         sll->sll_protocol = po->num;
3490         sll->sll_pkttype = 0;
3491         rcu_read_lock();
3492         dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
3493         if (dev) {
3494                 sll->sll_hatype = dev->type;
3495                 sll->sll_halen = dev->addr_len;
3496                 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
3497         } else {
3498                 sll->sll_hatype = 0;    
3499                 sll->sll_halen = 0;
3500         }
3501         rcu_read_unlock();
3502 
3503         return offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
3504 }
3505 
3506 static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
3507                          int what)
3508 {
3509         switch (i->type) {
3510         case PACKET_MR_MULTICAST:
3511                 if (i->alen != dev->addr_len)
3512                         return -EINVAL;
3513                 if (what > 0)
3514                         return dev_mc_add(dev, i->addr);
3515                 else
3516                         return dev_mc_del(dev, i->addr);
3517                 break;
3518         case PACKET_MR_PROMISC:
3519                 return dev_set_promiscuity(dev, what);
3520         case PACKET_MR_ALLMULTI:
3521                 return dev_set_allmulti(dev, what);
3522         case PACKET_MR_UNICAST:
3523                 if (i->alen != dev->addr_len)
3524                         return -EINVAL;
3525                 if (what > 0)
3526                         return dev_uc_add(dev, i->addr);
3527                 else
3528                         return dev_uc_del(dev, i->addr);
3529                 break;
3530         default:
3531                 break;
3532         }
3533         return 0;
3534 }
3535 
3536 static void packet_dev_mclist_delete(struct net_device *dev,
3537                                      struct packet_mclist **mlp)
3538 {
3539         struct packet_mclist *ml;
3540 
3541         while ((ml = *mlp) != NULL) {
3542                 if (ml->ifindex == dev->ifindex) {
3543                         packet_dev_mc(dev, ml, -1);
3544                         *mlp = ml->next;
3545                         kfree(ml);
3546                 } else
3547                         mlp = &ml->next;
3548         }
3549 }
3550 
3551 static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
3552 {
3553         struct packet_sock *po = pkt_sk(sk);
3554         struct packet_mclist *ml, *i;
3555         struct net_device *dev;
3556         int err;
3557 
3558         rtnl_lock();
3559 
3560         err = -ENODEV;
3561         dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
3562         if (!dev)
3563                 goto done;
3564 
3565         err = -EINVAL;
3566         if (mreq->mr_alen > dev->addr_len)
3567                 goto done;
3568 
3569         err = -ENOBUFS;
3570         i = kmalloc(sizeof(*i), GFP_KERNEL);
3571         if (i == NULL)
3572                 goto done;
3573 
3574         err = 0;
3575         for (ml = po->mclist; ml; ml = ml->next) {
3576                 if (ml->ifindex == mreq->mr_ifindex &&
3577                     ml->type == mreq->mr_type &&
3578                     ml->alen == mreq->mr_alen &&
3579                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
3580                         ml->count++;
3581                         
3582                         kfree(i);
3583                         goto done;
3584                 }
3585         }
3586 
3587         i->type = mreq->mr_type;
3588         i->ifindex = mreq->mr_ifindex;
3589         i->alen = mreq->mr_alen;
3590         memcpy(i->addr, mreq->mr_address, i->alen);
3591         memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen);
3592         i->count = 1;
3593         i->next = po->mclist;
3594         po->mclist = i;
3595         err = packet_dev_mc(dev, i, 1);
3596         if (err) {
3597                 po->mclist = i->next;
3598                 kfree(i);
3599         }
3600 
3601 done:
3602         rtnl_unlock();
3603         return err;
3604 }
3605 
3606 static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
3607 {
3608         struct packet_mclist *ml, **mlp;
3609 
3610         rtnl_lock();
3611 
3612         for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
3613                 if (ml->ifindex == mreq->mr_ifindex &&
3614                     ml->type == mreq->mr_type &&
3615                     ml->alen == mreq->mr_alen &&
3616                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
3617                         if (--ml->count == 0) {
3618                                 struct net_device *dev;
3619                                 *mlp = ml->next;
3620                                 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
3621                                 if (dev)
3622                                         packet_dev_mc(dev, ml, -1);
3623                                 kfree(ml);
3624                         }
3625                         break;
3626                 }
3627         }
3628         rtnl_unlock();
3629         return 0;
3630 }
3631 
3632 static void packet_flush_mclist(struct sock *sk)
3633 {
3634         struct packet_sock *po = pkt_sk(sk);
3635         struct packet_mclist *ml;
3636 
3637         if (!po->mclist)
3638                 return;
3639 
3640         rtnl_lock();
3641         while ((ml = po->mclist) != NULL) {
3642                 struct net_device *dev;
3643 
3644                 po->mclist = ml->next;
3645                 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
3646                 if (dev != NULL)
3647                         packet_dev_mc(dev, ml, -1);
3648                 kfree(ml);
3649         }
3650         rtnl_unlock();
3651 }
3652 
3653 static int
3654 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
3655 {
3656         struct sock *sk = sock->sk;
3657         struct packet_sock *po = pkt_sk(sk);
3658         int ret;
3659 
3660         if (level != SOL_PACKET)
3661                 return -ENOPROTOOPT;
3662 
3663         switch (optname) {
3664         case PACKET_ADD_MEMBERSHIP:
3665         case PACKET_DROP_MEMBERSHIP:
3666         {
3667                 struct packet_mreq_max mreq;
3668                 int len = optlen;
3669                 memset(&mreq, 0, sizeof(mreq));
3670                 if (len < sizeof(struct packet_mreq))
3671                         return -EINVAL;
3672                 if (len > sizeof(mreq))
3673                         len = sizeof(mreq);
3674                 if (copy_from_user(&mreq, optval, len))
3675                         return -EFAULT;
3676                 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
3677                         return -EINVAL;
3678                 if (optname == PACKET_ADD_MEMBERSHIP)
3679                         ret = packet_mc_add(sk, &mreq);
3680                 else
3681                         ret = packet_mc_drop(sk, &mreq);
3682                 return ret;
3683         }
3684 
3685         case PACKET_RX_RING:
3686         case PACKET_TX_RING:
3687         {
3688                 union tpacket_req_u req_u;
3689                 int len;
3690 
3691                 lock_sock(sk);
3692                 switch (po->tp_version) {
3693                 case TPACKET_V1:
3694                 case TPACKET_V2:
3695                         len = sizeof(req_u.req);
3696                         break;
3697                 case TPACKET_V3:
3698                 default:
3699                         len = sizeof(req_u.req3);
3700                         break;
3701                 }
3702                 if (optlen < len) {
3703                         ret = -EINVAL;
3704                 } else {
3705                         if (copy_from_user(&req_u.req, optval, len))
3706                                 ret = -EFAULT;
3707                         else
3708                                 ret = packet_set_ring(sk, &req_u, 0,
3709                                                     optname == PACKET_TX_RING);
3710                 }
3711                 release_sock(sk);
3712                 return ret;
3713         }
3714         case PACKET_COPY_THRESH:
3715         {
3716                 int val;
3717 
3718                 if (optlen != sizeof(val))
3719                         return -EINVAL;
3720                 if (copy_from_user(&val, optval, sizeof(val)))
3721                         return -EFAULT;
3722 
3723                 pkt_sk(sk)->copy_thresh = val;
3724                 return 0;
3725         }
3726         case PACKET_VERSION:
3727         {
3728                 int val;
3729 
3730                 if (optlen != sizeof(val))
3731                         return -EINVAL;
3732                 if (copy_from_user(&val, optval, sizeof(val)))
3733                         return -EFAULT;
3734                 switch (val) {
3735                 case TPACKET_V1:
3736                 case TPACKET_V2:
3737                 case TPACKET_V3:
3738                         break;
3739                 default:
3740                         return -EINVAL;
3741                 }
3742                 lock_sock(sk);
3743                 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3744                         ret = -EBUSY;
3745                 } else {
3746                         po->tp_version = val;
3747                         ret = 0;
3748                 }
3749                 release_sock(sk);
3750                 return ret;
3751         }
3752         case PACKET_RESERVE:
3753         {
3754                 unsigned int val;
3755 
3756                 if (optlen != sizeof(val))
3757                         return -EINVAL;
3758                 if (copy_from_user(&val, optval, sizeof(val)))
3759                         return -EFAULT;
3760                 if (val > INT_MAX)
3761                         return -EINVAL;
3762                 lock_sock(sk);
3763                 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3764                         ret = -EBUSY;
3765                 } else {
3766                         po->tp_reserve = val;
3767                         ret = 0;
3768                 }
3769                 release_sock(sk);
3770                 return ret;
3771         }
3772         case PACKET_LOSS:
3773         {
3774                 unsigned int val;
3775 
3776                 if (optlen != sizeof(val))
3777                         return -EINVAL;
3778                 if (copy_from_user(&val, optval, sizeof(val)))
3779                         return -EFAULT;
3780 
3781                 lock_sock(sk);
3782                 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3783                         ret = -EBUSY;
3784                 } else {
3785                         po->tp_loss = !!val;
3786                         ret = 0;
3787                 }
3788                 release_sock(sk);
3789                 return ret;
3790         }
3791         case PACKET_AUXDATA:
3792         {
3793                 int val;
3794 
3795                 if (optlen < sizeof(val))
3796                         return -EINVAL;
3797                 if (copy_from_user(&val, optval, sizeof(val)))
3798                         return -EFAULT;
3799 
3800                 lock_sock(sk);
3801                 po->auxdata = !!val;
3802                 release_sock(sk);
3803                 return 0;
3804         }
3805         case PACKET_ORIGDEV:
3806         {
3807                 int val;
3808 
3809                 if (optlen < sizeof(val))
3810                         return -EINVAL;
3811                 if (copy_from_user(&val, optval, sizeof(val)))
3812                         return -EFAULT;
3813 
3814                 lock_sock(sk);
3815                 po->origdev = !!val;
3816                 release_sock(sk);
3817                 return 0;
3818         }
3819         case PACKET_VNET_HDR:
3820         {
3821                 int val;
3822 
3823                 if (sock->type != SOCK_RAW)
3824                         return -EINVAL;
3825                 if (optlen < sizeof(val))
3826                         return -EINVAL;
3827                 if (copy_from_user(&val, optval, sizeof(val)))
3828                         return -EFAULT;
3829 
3830                 lock_sock(sk);
3831                 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3832                         ret = -EBUSY;
3833                 } else {
3834                         po->has_vnet_hdr = !!val;
3835                         ret = 0;
3836                 }
3837                 release_sock(sk);
3838                 return ret;
3839         }
3840         case PACKET_TIMESTAMP:
3841         {
3842                 int val;
3843 
3844                 if (optlen != sizeof(val))
3845                         return -EINVAL;
3846                 if (copy_from_user(&val, optval, sizeof(val)))
3847                         return -EFAULT;
3848 
3849                 po->tp_tstamp = val;
3850                 return 0;
3851         }
3852         case PACKET_FANOUT:
3853         {
3854                 int val;
3855 
3856                 if (optlen != sizeof(val))
3857                         return -EINVAL;
3858                 if (copy_from_user(&val, optval, sizeof(val)))
3859                         return -EFAULT;
3860 
3861                 return fanout_add(sk, val & 0xffff, val >> 16);
3862         }
3863         case PACKET_FANOUT_DATA:
3864         {
3865                 if (!po->fanout)
3866                         return -EINVAL;
3867 
3868                 return fanout_set_data(po, optval, optlen);
3869         }
3870         case PACKET_IGNORE_OUTGOING:
3871         {
3872                 int val;
3873 
3874                 if (optlen != sizeof(val))
3875                         return -EINVAL;
3876                 if (copy_from_user(&val, optval, sizeof(val)))
3877                         return -EFAULT;
3878                 if (val < 0 || val > 1)
3879                         return -EINVAL;
3880 
3881                 po->prot_hook.ignore_outgoing = !!val;
3882                 return 0;
3883         }
3884         case PACKET_TX_HAS_OFF:
3885         {
3886                 unsigned int val;
3887 
3888                 if (optlen != sizeof(val))
3889                         return -EINVAL;
3890                 if (copy_from_user(&val, optval, sizeof(val)))
3891                         return -EFAULT;
3892 
3893                 lock_sock(sk);
3894                 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3895                         ret = -EBUSY;
3896                 } else {
3897                         po->tp_tx_has_off = !!val;
3898                         ret = 0;
3899                 }
3900                 release_sock(sk);
3901                 return 0;
3902         }
3903         case PACKET_QDISC_BYPASS:
3904         {
3905                 int val;
3906 
3907                 if (optlen != sizeof(val))
3908                         return -EINVAL;
3909                 if (copy_from_user(&val, optval, sizeof(val)))
3910                         return -EFAULT;
3911 
3912                 po->xmit = val ? packet_direct_xmit : dev_queue_xmit;
3913                 return 0;
3914         }
3915         default:
3916                 return -ENOPROTOOPT;
3917         }
3918 }
3919 
3920 static int packet_getsockopt(struct socket *sock, int level, int optname,
3921                              char __user *optval, int __user *optlen)
3922 {
3923         int len;
3924         int val, lv = sizeof(val);
3925         struct sock *sk = sock->sk;
3926         struct packet_sock *po = pkt_sk(sk);
3927         void *data = &val;
3928         union tpacket_stats_u st;
3929         struct tpacket_rollover_stats rstats;
3930         int drops;
3931 
3932         if (level != SOL_PACKET)
3933                 return -ENOPROTOOPT;
3934 
3935         if (get_user(len, optlen))
3936                 return -EFAULT;
3937 
3938         if (len < 0)
3939                 return -EINVAL;
3940 
3941         switch (optname) {
3942         case PACKET_STATISTICS:
3943                 spin_lock_bh(&sk->sk_receive_queue.lock);
3944                 memcpy(&st, &po->stats, sizeof(st));
3945                 memset(&po->stats, 0, sizeof(po->stats));
3946                 spin_unlock_bh(&sk->sk_receive_queue.lock);
3947                 drops = atomic_xchg(&po->tp_drops, 0);
3948 
3949                 if (po->tp_version == TPACKET_V3) {
3950                         lv = sizeof(struct tpacket_stats_v3);
3951                         st.stats3.tp_drops = drops;
3952                         st.stats3.tp_packets += drops;
3953                         data = &st.stats3;
3954                 } else {
3955                         lv = sizeof(struct tpacket_stats);
3956                         st.stats1.tp_drops = drops;
3957                         st.stats1.tp_packets += drops;
3958                         data = &st.stats1;
3959                 }
3960 
3961                 break;
3962         case PACKET_AUXDATA:
3963                 val = po->auxdata;
3964                 break;
3965         case PACKET_ORIGDEV:
3966                 val = po->origdev;
3967                 break;
3968         case PACKET_VNET_HDR:
3969                 val = po->has_vnet_hdr;
3970                 break;
3971         case PACKET_VERSION:
3972                 val = po->tp_version;
3973                 break;
3974         case PACKET_HDRLEN:
3975                 if (len > sizeof(int))
3976                         len = sizeof(int);
3977                 if (len < sizeof(int))
3978                         return -EINVAL;
3979                 if (copy_from_user(&val, optval, len))
3980                         return -EFAULT;
3981                 switch (val) {
3982                 case TPACKET_V1:
3983                         val = sizeof(struct tpacket_hdr);
3984                         break;
3985                 case TPACKET_V2:
3986                         val = sizeof(struct tpacket2_hdr);
3987                         break;
3988                 case TPACKET_V3:
3989                         val = sizeof(struct tpacket3_hdr);
3990                         break;
3991                 default:
3992                         return -EINVAL;
3993                 }
3994                 break;
3995         case PACKET_RESERVE:
3996                 val = po->tp_reserve;
3997                 break;
3998         case PACKET_LOSS:
3999                 val = po->tp_loss;
4000                 break;
4001         case PACKET_TIMESTAMP:
4002                 val = po->tp_tstamp;
4003                 break;
4004         case PACKET_FANOUT:
4005                 val = (po->fanout ?
4006                        ((u32)po->fanout->id |
4007                         ((u32)po->fanout->type << 16) |
4008                         ((u32)po->fanout->flags << 24)) :
4009                        0);
4010                 break;
4011         case PACKET_IGNORE_OUTGOING:
4012                 val = po->prot_hook.ignore_outgoing;
4013                 break;
4014         case PACKET_ROLLOVER_STATS:
4015                 if (!po->rollover)
4016                         return -EINVAL;
4017                 rstats.tp_all = atomic_long_read(&po->rollover->num);
4018                 rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
4019                 rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
4020                 data = &rstats;
4021                 lv = sizeof(rstats);
4022                 break;
4023         case PACKET_TX_HAS_OFF:
4024                 val = po->tp_tx_has_off;
4025                 break;
4026         case PACKET_QDISC_BYPASS:
4027                 val = packet_use_direct_xmit(po);
4028                 break;
4029         default:
4030                 return -ENOPROTOOPT;
4031         }
4032 
4033         if (len > lv)
4034                 len = lv;
4035         if (put_user(len, optlen))
4036                 return -EFAULT;
4037         if (copy_to_user(optval, data, len))
4038                 return -EFAULT;
4039         return 0;
4040 }
4041 
4042 
4043 #ifdef CONFIG_COMPAT
4044 static int compat_packet_setsockopt(struct socket *sock, int level, int optname,
4045                                     char __user *optval, unsigned int optlen)
4046 {
4047         struct packet_sock *po = pkt_sk(sock->sk);
4048 
4049         if (level != SOL_PACKET)
4050                 return -ENOPROTOOPT;
4051 
4052         if (optname == PACKET_FANOUT_DATA &&
4053             po->fanout && po->fanout->type == PACKET_FANOUT_CBPF) {
4054                 optval = (char __user *)get_compat_bpf_fprog(optval);
4055                 if (!optval)
4056                         return -EFAULT;
4057                 optlen = sizeof(struct sock_fprog);
4058         }
4059 
4060         return packet_setsockopt(sock, level, optname, optval, optlen);
4061 }
4062 #endif
4063 
4064 static int packet_notifier(struct notifier_block *this,
4065                            unsigned long msg, void *ptr)
4066 {
4067         struct sock *sk;
4068         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
4069         struct net *net = dev_net(dev);
4070 
4071         rcu_read_lock();
4072         sk_for_each_rcu(sk, &net->packet.sklist) {
4073                 struct packet_sock *po = pkt_sk(sk);
4074 
4075                 switch (msg) {
4076                 case NETDEV_UNREGISTER:
4077                         if (po->mclist)
4078                                 packet_dev_mclist_delete(dev, &po->mclist);
4079                         
4080 
4081                 case NETDEV_DOWN:
4082                         if (dev->ifindex == po->ifindex) {
4083                                 spin_lock(&po->bind_lock);
4084                                 if (po->running) {
4085                                         __unregister_prot_hook(sk, false);
4086                                         sk->sk_err = ENETDOWN;
4087                                         if (!sock_flag(sk, SOCK_DEAD))
4088                                                 sk->sk_error_report(sk);
4089                                 }
4090                                 if (msg == NETDEV_UNREGISTER) {
4091                                         packet_cached_dev_reset(po);
4092                                         po->ifindex = -1;
4093                                         if (po->prot_hook.dev)
4094                                                 dev_put(po->prot_hook.dev);
4095                                         po->prot_hook.dev = NULL;
4096                                 }
4097                                 spin_unlock(&po->bind_lock);
4098                         }
4099                         break;
4100                 case NETDEV_UP:
4101                         if (dev->ifindex == po->ifindex) {
4102                                 spin_lock(&po->bind_lock);
4103                                 if (po->num)
4104                                         register_prot_hook(sk);
4105                                 spin_unlock(&po->bind_lock);
4106                         }
4107                         break;
4108                 }
4109         }
4110         rcu_read_unlock();
4111         return NOTIFY_DONE;
4112 }
4113 
4114 
4115 static int packet_ioctl(struct socket *sock, unsigned int cmd,
4116                         unsigned long arg)
4117 {
4118         struct sock *sk = sock->sk;
4119 
4120         switch (cmd) {
4121         case SIOCOUTQ:
4122         {
4123                 int amount = sk_wmem_alloc_get(sk);
4124 
4125                 return put_user(amount, (int __user *)arg);
4126         }
4127         case SIOCINQ:
4128         {
4129                 struct sk_buff *skb;
4130                 int amount = 0;
4131 
4132                 spin_lock_bh(&sk->sk_receive_queue.lock);
4133                 skb = skb_peek(&sk->sk_receive_queue);
4134                 if (skb)
4135                         amount = skb->len;
4136                 spin_unlock_bh(&sk->sk_receive_queue.lock);
4137                 return put_user(amount, (int __user *)arg);
4138         }
4139 #ifdef CONFIG_INET
4140         case SIOCADDRT:
4141         case SIOCDELRT:
4142         case SIOCDARP:
4143         case SIOCGARP:
4144         case SIOCSARP:
4145         case SIOCGIFADDR:
4146         case SIOCSIFADDR:
4147         case SIOCGIFBRDADDR:
4148         case SIOCSIFBRDADDR:
4149         case SIOCGIFNETMASK:
4150         case SIOCSIFNETMASK:
4151         case SIOCGIFDSTADDR:
4152         case SIOCSIFDSTADDR:
4153         case SIOCSIFFLAGS:
4154                 return inet_dgram_ops.ioctl(sock, cmd, arg);
4155 #endif
4156 
4157         default:
4158                 return -ENOIOCTLCMD;
4159         }
4160         return 0;
4161 }
4162 
4163 static __poll_t packet_poll(struct file *file, struct socket *sock,
4164                                 poll_table *wait)
4165 {
4166         struct sock *sk = sock->sk;
4167         struct packet_sock *po = pkt_sk(sk);
4168         __poll_t mask = datagram_poll(file, sock, wait);
4169 
4170         spin_lock_bh(&sk->sk_receive_queue.lock);
4171         if (po->rx_ring.pg_vec) {
4172                 if (!packet_previous_rx_frame(po, &po->rx_ring,
4173                         TP_STATUS_KERNEL))
4174                         mask |= EPOLLIN | EPOLLRDNORM;
4175         }
4176         packet_rcv_try_clear_pressure(po);
4177         spin_unlock_bh(&sk->sk_receive_queue.lock);
4178         spin_lock_bh(&sk->sk_write_queue.lock);
4179         if (po->tx_ring.pg_vec) {
4180                 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
4181                         mask |= EPOLLOUT | EPOLLWRNORM;
4182         }
4183         spin_unlock_bh(&sk->sk_write_queue.lock);
4184         return mask;
4185 }
4186 
4187 
4188 
4189 
4190 
4191 
4192 static void packet_mm_open(struct vm_area_struct *vma)
4193 {
4194         struct file *file = vma->vm_file;
4195         struct socket *sock = file->private_data;
4196         struct sock *sk = sock->sk;
4197 
4198         if (sk)
4199                 atomic_inc(&pkt_sk(sk)->mapped);
4200 }
4201 
4202 static void packet_mm_close(struct vm_area_struct *vma)
4203 {
4204         struct file *file = vma->vm_file;
4205         struct socket *sock = file->private_data;
4206         struct sock *sk = sock->sk;
4207 
4208         if (sk)
4209                 atomic_dec(&pkt_sk(sk)->mapped);
4210 }
4211 
4212 static const struct vm_operations_struct packet_mmap_ops = {
4213         .open   =       packet_mm_open,
4214         .close  =       packet_mm_close,
4215 };
4216 
4217 static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
4218                         unsigned int len)
4219 {
4220         int i;
4221 
4222         for (i = 0; i < len; i++) {
4223                 if (likely(pg_vec[i].buffer)) {
4224                         if (is_vmalloc_addr(pg_vec[i].buffer))
4225                                 vfree(pg_vec[i].buffer);
4226                         else
4227                                 free_pages((unsigned long)pg_vec[i].buffer,
4228                                            order);
4229                         pg_vec[i].buffer = NULL;
4230                 }
4231         }
4232         kfree(pg_vec);
4233 }
4234 
4235 static char *alloc_one_pg_vec_page(unsigned long order)
4236 {
4237         char *buffer;
4238         gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
4239                           __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
4240 
4241         buffer = (char *) __get_free_pages(gfp_flags, order);
4242         if (buffer)
4243                 return buffer;
4244 
4245         
4246         buffer = vzalloc(array_size((1 << order), PAGE_SIZE));
4247         if (buffer)
4248                 return buffer;
4249 
4250         
4251         gfp_flags &= ~__GFP_NORETRY;
4252         buffer = (char *) __get_free_pages(gfp_flags, order);
4253         if (buffer)
4254                 return buffer;
4255 
4256         
4257         return NULL;
4258 }
4259 
4260 static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
4261 {
4262         unsigned int block_nr = req->tp_block_nr;
4263         struct pgv *pg_vec;
4264         int i;
4265 
4266         pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN);
4267         if (unlikely(!pg_vec))
4268                 goto out;
4269 
4270         for (i = 0; i < block_nr; i++) {
4271                 pg_vec[i].buffer = alloc_one_pg_vec_page(order);
4272                 if (unlikely(!pg_vec[i].buffer))
4273                         goto out_free_pgvec;
4274         }
4275 
4276 out:
4277         return pg_vec;
4278 
4279 out_free_pgvec:
4280         free_pg_vec(pg_vec, order, block_nr);
4281         pg_vec = NULL;
4282         goto out;
4283 }
4284 
4285 static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
4286                 int closing, int tx_ring)
4287 {
4288         struct pgv *pg_vec = NULL;
4289         struct packet_sock *po = pkt_sk(sk);
4290         unsigned long *rx_owner_map = NULL;
4291         int was_running, order = 0;
4292         struct packet_ring_buffer *rb;
4293         struct sk_buff_head *rb_queue;
4294         __be16 num;
4295         int err = -EINVAL;
4296         
4297         struct tpacket_req *req = &req_u->req;
4298 
4299         rb = tx_ring ? &po->tx_ring : &po->rx_ring;
4300         rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
4301 
4302         err = -EBUSY;
4303         if (!closing) {
4304                 if (atomic_read(&po->mapped))
4305                         goto out;
4306                 if (packet_read_pending(rb))
4307                         goto out;
4308         }
4309 
4310         if (req->tp_block_nr) {
4311                 unsigned int min_frame_size;
4312 
4313                 
4314                 err = -EBUSY;
4315                 if (unlikely(rb->pg_vec))
4316                         goto out;
4317 
4318                 switch (po->tp_version) {
4319                 case TPACKET_V1:
4320                         po->tp_hdrlen = TPACKET_HDRLEN;
4321                         break;
4322                 case TPACKET_V2:
4323                         po->tp_hdrlen = TPACKET2_HDRLEN;
4324                         break;
4325                 case TPACKET_V3:
4326                         po->tp_hdrlen = TPACKET3_HDRLEN;
4327                         break;
4328                 }
4329 
4330                 err = -EINVAL;
4331                 if (unlikely((int)req->tp_block_size <= 0))
4332                         goto out;
4333                 if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
4334                         goto out;
4335                 min_frame_size = po->tp_hdrlen + po->tp_reserve;
4336                 if (po->tp_version >= TPACKET_V3 &&
4337                     req->tp_block_size <
4338                     BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + min_frame_size)
4339                         goto out;
4340                 if (unlikely(req->tp_frame_size < min_frame_size))
4341                         goto out;
4342                 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
4343                         goto out;
4344 
4345                 rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
4346                 if (unlikely(rb->frames_per_block == 0))
4347                         goto out;
4348                 if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr))
4349                         goto out;
4350                 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
4351                                         req->tp_frame_nr))
4352                         goto out;
4353 
4354                 err = -ENOMEM;
4355                 order = get_order(req->tp_block_size);
4356                 pg_vec = alloc_pg_vec(req, order);
4357                 if (unlikely(!pg_vec))
4358                         goto out;
4359                 switch (po->tp_version) {
4360                 case TPACKET_V3:
4361                         
4362                         if (!tx_ring) {
4363                                 init_prb_bdqc(po, rb, pg_vec, req_u);
4364                         } else {
4365                                 struct tpacket_req3 *req3 = &req_u->req3;
4366 
4367                                 if (req3->tp_retire_blk_tov ||
4368                                     req3->tp_sizeof_priv ||
4369                                     req3->tp_feature_req_word) {
4370                                         err = -EINVAL;
4371                                         goto out_free_pg_vec;
4372                                 }
4373                         }
4374                         break;
4375                 default:
4376                         if (!tx_ring) {
4377                                 rx_owner_map = bitmap_alloc(req->tp_frame_nr,
4378                                         GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
4379                                 if (!rx_owner_map)
4380                                         goto out_free_pg_vec;
4381                         }
4382                         break;
4383                 }
4384         }
4385         
4386         else {
4387                 err = -EINVAL;
4388                 if (unlikely(req->tp_frame_nr))
4389                         goto out;
4390         }
4391 
4392 
4393         
4394         spin_lock(&po->bind_lock);
4395         was_running = po->running;
4396         num = po->num;
4397         if (was_running) {
4398                 po->num = 0;
4399                 __unregister_prot_hook(sk, false);
4400         }
4401         spin_unlock(&po->bind_lock);
4402 
4403         synchronize_net();
4404 
4405         err = -EBUSY;
4406         mutex_lock(&po->pg_vec_lock);
4407         if (closing || atomic_read(&po->mapped) == 0) {
4408                 err = 0;
4409                 spin_lock_bh(&rb_queue->lock);
4410                 swap(rb->pg_vec, pg_vec);
4411                 if (po->tp_version <= TPACKET_V2)
4412                         swap(rb->rx_owner_map, rx_owner_map);
4413                 rb->frame_max = (req->tp_frame_nr - 1);
4414                 rb->head = 0;
4415                 rb->frame_size = req->tp_frame_size;
4416                 spin_unlock_bh(&rb_queue->lock);
4417 
4418                 swap(rb->pg_vec_order, order);
4419                 swap(rb->pg_vec_len, req->tp_block_nr);
4420 
4421                 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
4422                 po->prot_hook.func = (po->rx_ring.pg_vec) ?
4423                                                 tpacket_rcv : packet_rcv;
4424                 skb_queue_purge(rb_queue);
4425                 if (atomic_read(&po->mapped))
4426                         pr_err("packet_mmap: vma is busy: %d\n",
4427                                atomic_read(&po->mapped));
4428         }
4429         mutex_unlock(&po->pg_vec_lock);
4430 
4431         spin_lock(&po->bind_lock);
4432         if (was_running) {
4433                 po->num = num;
4434                 register_prot_hook(sk);
4435         }
4436         spin_unlock(&po->bind_lock);
4437         if (pg_vec && (po->tp_version > TPACKET_V2)) {
4438                 
4439                 if (!tx_ring)
4440                         prb_shutdown_retire_blk_timer(po, rb_queue);
4441         }
4442 
4443 out_free_pg_vec:
4444         bitmap_free(rx_owner_map);
4445         if (pg_vec)
4446                 free_pg_vec(pg_vec, order, req->tp_block_nr);
4447 out:
4448         return err;
4449 }
4450 
4451 static int packet_mmap(struct file *file, struct socket *sock,
4452                 struct vm_area_struct *vma)
4453 {
4454         struct sock *sk = sock->sk;
4455         struct packet_sock *po = pkt_sk(sk);
4456         unsigned long size, expected_size;
4457         struct packet_ring_buffer *rb;
4458         unsigned long start;
4459         int err = -EINVAL;
4460         int i;
4461 
4462         if (vma->vm_pgoff)
4463                 return -EINVAL;
4464 
4465         mutex_lock(&po->pg_vec_lock);
4466 
4467         expected_size = 0;
4468         for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
4469                 if (rb->pg_vec) {
4470                         expected_size += rb->pg_vec_len
4471                                                 * rb->pg_vec_pages
4472                                                 * PAGE_SIZE;
4473                 }
4474         }
4475 
4476         if (expected_size == 0)
4477                 goto out;
4478 
4479         size = vma->vm_end - vma->vm_start;
4480         if (size != expected_size)
4481                 goto out;
4482 
4483         start = vma->vm_start;
4484         for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
4485                 if (rb->pg_vec == NULL)
4486                         continue;
4487 
4488                 for (i = 0; i < rb->pg_vec_len; i++) {
4489                         struct page *page;
4490                         void *kaddr = rb->pg_vec[i].buffer;
4491                         int pg_num;
4492 
4493                         for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
4494                                 page = pgv_to_page(kaddr);
4495                                 err = vm_insert_page(vma, start, page);
4496                                 if (unlikely(err))
4497                                         goto out;
4498                                 start += PAGE_SIZE;
4499                                 kaddr += PAGE_SIZE;
4500                         }
4501                 }
4502         }
4503 
4504         atomic_inc(&po->mapped);
4505         vma->vm_ops = &packet_mmap_ops;
4506         err = 0;
4507 
4508 out:
4509         mutex_unlock(&po->pg_vec_lock);
4510         return err;
4511 }
4512 
4513 static const struct proto_ops packet_ops_spkt = {
4514         .family =       PF_PACKET,
4515         .owner =        THIS_MODULE,
4516         .release =      packet_release,
4517         .bind =         packet_bind_spkt,
4518         .connect =      sock_no_connect,
4519         .socketpair =   sock_no_socketpair,
4520         .accept =       sock_no_accept,
4521         .getname =      packet_getname_spkt,
4522         .poll =         datagram_poll,
4523         .ioctl =        packet_ioctl,
4524         .gettstamp =    sock_gettstamp,
4525         .listen =       sock_no_listen,
4526         .shutdown =     sock_no_shutdown,
4527         .setsockopt =   sock_no_setsockopt,
4528         .getsockopt =   sock_no_getsockopt,
4529         .sendmsg =      packet_sendmsg_spkt,
4530         .recvmsg =      packet_recvmsg,
4531         .mmap =         sock_no_mmap,
4532         .sendpage =     sock_no_sendpage,
4533 };
4534 
4535 static const struct proto_ops packet_ops = {
4536         .family =       PF_PACKET,
4537         .owner =        THIS_MODULE,
4538         .release =      packet_release,
4539         .bind =         packet_bind,
4540         .connect =      sock_no_connect,
4541         .socketpair =   sock_no_socketpair,
4542         .accept =       sock_no_accept,
4543         .getname =      packet_getname,
4544         .poll =         packet_poll,
4545         .ioctl =        packet_ioctl,
4546         .gettstamp =    sock_gettstamp,
4547         .listen =       sock_no_listen,
4548         .shutdown =     sock_no_shutdown,
4549         .setsockopt =   packet_setsockopt,
4550         .getsockopt =   packet_getsockopt,
4551 #ifdef CONFIG_COMPAT
4552         .compat_setsockopt = compat_packet_setsockopt,
4553 #endif
4554         .sendmsg =      packet_sendmsg,
4555         .recvmsg =      packet_recvmsg,
4556         .mmap =         packet_mmap,
4557         .sendpage =     sock_no_sendpage,
4558 };
4559 
4560 static const struct net_proto_family packet_family_ops = {
4561         .family =       PF_PACKET,
4562         .create =       packet_create,
4563         .owner  =       THIS_MODULE,
4564 };
4565 
4566 static struct notifier_block packet_netdev_notifier = {
4567         .notifier_call =        packet_notifier,
4568 };
4569 
4570 #ifdef CONFIG_PROC_FS
4571 
4572 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
4573         __acquires(RCU)
4574 {
4575         struct net *net = seq_file_net(seq);
4576 
4577         rcu_read_lock();
4578         return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
4579 }
4580 
4581 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4582 {
4583         struct net *net = seq_file_net(seq);
4584         return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
4585 }
4586 
4587 static void packet_seq_stop(struct seq_file *seq, void *v)
4588         __releases(RCU)
4589 {
4590         rcu_read_unlock();
4591 }
4592 
4593 static int packet_seq_show(struct seq_file *seq, void *v)
4594 {
4595         if (v == SEQ_START_TOKEN)
4596                 seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
4597         else {
4598                 struct sock *s = sk_entry(v);
4599                 const struct packet_sock *po = pkt_sk(s);
4600 
4601                 seq_printf(seq,
4602                            "%pK %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
4603                            s,
4604                            refcount_read(&s->sk_refcnt),
4605                            s->sk_type,
4606                            ntohs(po->num),
4607                            po->ifindex,
4608                            po->running,
4609                            atomic_read(&s->sk_rmem_alloc),
4610                            from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
4611                            sock_i_ino(s));
4612         }
4613 
4614         return 0;
4615 }
4616 
4617 static const struct seq_operations packet_seq_ops = {
4618         .start  = packet_seq_start,
4619         .next   = packet_seq_next,
4620         .stop   = packet_seq_stop,
4621         .show   = packet_seq_show,
4622 };
4623 #endif
4624 
4625 static int __net_init packet_net_init(struct net *net)
4626 {
4627         mutex_init(&net->packet.sklist_lock);
4628         INIT_HLIST_HEAD(&net->packet.sklist);
4629 
4630         if (!proc_create_net("packet", 0, net->proc_net, &packet_seq_ops,
4631                         sizeof(struct seq_net_private)))
4632                 return -ENOMEM;
4633 
4634         return 0;
4635 }
4636 
4637 static void __net_exit packet_net_exit(struct net *net)
4638 {
4639         remove_proc_entry("packet", net->proc_net);
4640         WARN_ON_ONCE(!hlist_empty(&net->packet.sklist));
4641 }
4642 
4643 static struct pernet_operations packet_net_ops = {
4644         .init = packet_net_init,
4645         .exit = packet_net_exit,
4646 };
4647 
4648 
4649 static void __exit packet_exit(void)
4650 {
4651         unregister_netdevice_notifier(&packet_netdev_notifier);
4652         unregister_pernet_subsys(&packet_net_ops);
4653         sock_unregister(PF_PACKET);
4654         proto_unregister(&packet_proto);
4655 }
4656 
4657 static int __init packet_init(void)
4658 {
4659         int rc;
4660 
4661         rc = proto_register(&packet_proto, 0);
4662         if (rc)
4663                 goto out;
4664         rc = sock_register(&packet_family_ops);
4665         if (rc)
4666                 goto out_proto;
4667         rc = register_pernet_subsys(&packet_net_ops);
4668         if (rc)
4669                 goto out_sock;
4670         rc = register_netdevice_notifier(&packet_netdev_notifier);
4671         if (rc)
4672                 goto out_pernet;
4673 
4674         return 0;
4675 
4676 out_pernet:
4677         unregister_pernet_subsys(&packet_net_ops);
4678 out_sock:
4679         sock_unregister(PF_PACKET);
4680 out_proto:
4681         proto_unregister(&packet_proto);
4682 out:
4683         return rc;
4684 }
4685 
4686 module_init(packet_init);
4687 module_exit(packet_exit);
4688 MODULE_LICENSE("GPL");
4689 MODULE_ALIAS_NETPROTO(PF_PACKET);