1/* 2 * NET4: Implementation of BSD Unix domain sockets. 3 * 4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 * Fixes: 12 * Linus Torvalds : Assorted bug cures. 13 * Niibe Yutaka : async I/O support. 14 * Carsten Paeth : PF_UNIX check, address fixes. 15 * Alan Cox : Limit size of allocated blocks. 16 * Alan Cox : Fixed the stupid socketpair bug. 17 * Alan Cox : BSD compatibility fine tuning. 18 * Alan Cox : Fixed a bug in connect when interrupted. 19 * Alan Cox : Sorted out a proper draft version of 20 * file descriptor passing hacked up from 21 * Mike Shaver's work. 22 * Marty Leisner : Fixes to fd passing 23 * Nick Nevin : recvmsg bugfix. 24 * Alan Cox : Started proper garbage collector 25 * Heiko EiBfeldt : Missing verify_area check 26 * Alan Cox : Started POSIXisms 27 * Andreas Schwab : Replace inode by dentry for proper 28 * reference counting 29 * Kirk Petersen : Made this a module 30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm. 31 * Lots of bug fixes. 32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces 33 * by above two patches. 34 * Andrea Arcangeli : If possible we block in connect(2) 35 * if the max backlog of the listen socket 36 * is been reached. This won't break 37 * old apps and it will avoid huge amount 38 * of socks hashed (this for unix_gc() 39 * performances reasons). 40 * Security fix that limits the max 41 * number of socks to 2*max_files and 42 * the number of skb queueable in the 43 * dgram receiver. 44 * Artur Skawina : Hash function optimizations 45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) 46 * Malcolm Beattie : Set peercred for socketpair 47 * Michal Ostrowski : Module initialization cleanup. 48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, 49 * the core infrastructure is doing that 50 * for all net proto families now (2.5.69+) 51 * 52 * 53 * Known differences from reference BSD that was tested: 54 * 55 * [TO FIX] 56 * ECONNREFUSED is not returned from one end of a connected() socket to the 57 * other the moment one end closes. 58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark 59 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 60 * [NOT TO FIX] 61 * accept() returns a path name even if the connecting socket has closed 62 * in the meantime (BSD loses the path and gives up). 63 * accept() returns 0 length path for an unbound connector. BSD returns 16 64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 65 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 66 * BSD af_unix apparently has connect forgetting to block properly. 67 * (need to check this with the POSIX spec in detail) 68 * 69 * Differences from 2.0.0-11-... (ANK) 70 * Bug fixes and improvements. 71 * - client shutdown killed server socket. 72 * - removed all useless cli/sti pairs. 73 * 74 * Semantic changes/extensions. 75 * - generic control message passing. 76 * - SCM_CREDENTIALS control message. 77 * - "Abstract" (not FS based) socket bindings. 78 * Abstract names are sequences of bytes (not zero terminated) 79 * started by 0, so that this name space does not intersect 80 * with BSD names. 81 */ 82 83#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 84 85#include <linux/module.h> 86#include <linux/kernel.h> 87#include <linux/signal.h> 88#include <linux/sched.h> 89#include <linux/errno.h> 90#include <linux/string.h> 91#include <linux/stat.h> 92#include <linux/dcache.h> 93#include <linux/namei.h> 94#include <linux/socket.h> 95#include <linux/un.h> 96#include <linux/fcntl.h> 97#include <linux/termios.h> 98#include <linux/sockios.h> 99#include <linux/net.h> 100#include <linux/in.h> 101#include <linux/fs.h> 102#include <linux/slab.h> 103#include <asm/uaccess.h> 104#include <linux/skbuff.h> 105#include <linux/netdevice.h> 106#include <net/net_namespace.h> 107#include <net/sock.h> 108#include <net/tcp_states.h> 109#include <net/af_unix.h> 110#include <linux/proc_fs.h> 111#include <linux/seq_file.h> 112#include <net/scm.h> 113#include <linux/init.h> 114#include <linux/poll.h> 115#include <linux/rtnetlink.h> 116#include <linux/mount.h> 117#include <net/checksum.h> 118#include <linux/security.h> 119#include <linux/freezer.h> 120 121struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; 122EXPORT_SYMBOL_GPL(unix_socket_table); 123DEFINE_SPINLOCK(unix_table_lock); 124EXPORT_SYMBOL_GPL(unix_table_lock); 125static atomic_long_t unix_nr_socks; 126 127 128static struct hlist_head *unix_sockets_unbound(void *addr) 129{ 130 unsigned long hash = (unsigned long)addr; 131 132 hash ^= hash >> 16; 133 hash ^= hash >> 8; 134 hash %= UNIX_HASH_SIZE; 135 return &unix_socket_table[UNIX_HASH_SIZE + hash]; 136} 137 138#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE) 139 140#ifdef CONFIG_SECURITY_NETWORK 141static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 142{ 143 memcpy(UNIXSID(skb), &scm->secid, sizeof(u32)); 144} 145 146static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 147{ 148 scm->secid = *UNIXSID(skb); 149} 150#else 151static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 152{ } 153 154static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 155{ } 156#endif /* CONFIG_SECURITY_NETWORK */ 157 158/* 159 * SMP locking strategy: 160 * hash table is protected with spinlock unix_table_lock 161 * each socket state is protected by separate spin lock. 162 */ 163 164static inline unsigned int unix_hash_fold(__wsum n) 165{ 166 unsigned int hash = (__force unsigned int)csum_fold(n); 167 168 hash ^= hash>>8; 169 return hash&(UNIX_HASH_SIZE-1); 170} 171 172#define unix_peer(sk) (unix_sk(sk)->peer) 173 174static inline int unix_our_peer(struct sock *sk, struct sock *osk) 175{ 176 return unix_peer(osk) == sk; 177} 178 179static inline int unix_may_send(struct sock *sk, struct sock *osk) 180{ 181 return unix_peer(osk) == NULL || unix_our_peer(sk, osk); 182} 183 184static inline int unix_recvq_full(struct sock const *sk) 185{ 186 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; 187} 188 189struct sock *unix_peer_get(struct sock *s) 190{ 191 struct sock *peer; 192 193 unix_state_lock(s); 194 peer = unix_peer(s); 195 if (peer) 196 sock_hold(peer); 197 unix_state_unlock(s); 198 return peer; 199} 200EXPORT_SYMBOL_GPL(unix_peer_get); 201 202static inline void unix_release_addr(struct unix_address *addr) 203{ 204 if (atomic_dec_and_test(&addr->refcnt)) 205 kfree(addr); 206} 207 208/* 209 * Check unix socket name: 210 * - should be not zero length. 211 * - if started by not zero, should be NULL terminated (FS object) 212 * - if started by zero, it is abstract name. 213 */ 214 215static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp) 216{ 217 if (len <= sizeof(short) || len > sizeof(*sunaddr)) 218 return -EINVAL; 219 if (!sunaddr || sunaddr->sun_family != AF_UNIX) 220 return -EINVAL; 221 if (sunaddr->sun_path[0]) { 222 /* 223 * This may look like an off by one error but it is a bit more 224 * subtle. 108 is the longest valid AF_UNIX path for a binding. 225 * sun_path[108] doesn't as such exist. However in kernel space 226 * we are guaranteed that it is a valid memory location in our 227 * kernel address buffer. 228 */ 229 ((char *)sunaddr)[len] = 0; 230 len = strlen(sunaddr->sun_path)+1+sizeof(short); 231 return len; 232 } 233 234 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0)); 235 return len; 236} 237 238static void __unix_remove_socket(struct sock *sk) 239{ 240 sk_del_node_init(sk); 241} 242 243static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) 244{ 245 WARN_ON(!sk_unhashed(sk)); 246 sk_add_node(sk, list); 247} 248 249static inline void unix_remove_socket(struct sock *sk) 250{ 251 spin_lock(&unix_table_lock); 252 __unix_remove_socket(sk); 253 spin_unlock(&unix_table_lock); 254} 255 256static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) 257{ 258 spin_lock(&unix_table_lock); 259 __unix_insert_socket(list, sk); 260 spin_unlock(&unix_table_lock); 261} 262 263static struct sock *__unix_find_socket_byname(struct net *net, 264 struct sockaddr_un *sunname, 265 int len, int type, unsigned int hash) 266{ 267 struct sock *s; 268 269 sk_for_each(s, &unix_socket_table[hash ^ type]) { 270 struct unix_sock *u = unix_sk(s); 271 272 if (!net_eq(sock_net(s), net)) 273 continue; 274 275 if (u->addr->len == len && 276 !memcmp(u->addr->name, sunname, len)) 277 goto found; 278 } 279 s = NULL; 280found: 281 return s; 282} 283 284static inline struct sock *unix_find_socket_byname(struct net *net, 285 struct sockaddr_un *sunname, 286 int len, int type, 287 unsigned int hash) 288{ 289 struct sock *s; 290 291 spin_lock(&unix_table_lock); 292 s = __unix_find_socket_byname(net, sunname, len, type, hash); 293 if (s) 294 sock_hold(s); 295 spin_unlock(&unix_table_lock); 296 return s; 297} 298 299static struct sock *unix_find_socket_byinode(struct inode *i) 300{ 301 struct sock *s; 302 303 spin_lock(&unix_table_lock); 304 sk_for_each(s, 305 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { 306 struct dentry *dentry = unix_sk(s)->path.dentry; 307 308 if (dentry && d_backing_inode(dentry) == i) { 309 sock_hold(s); 310 goto found; 311 } 312 } 313 s = NULL; 314found: 315 spin_unlock(&unix_table_lock); 316 return s; 317} 318 319/* Support code for asymmetrically connected dgram sockets 320 * 321 * If a datagram socket is connected to a socket not itself connected 322 * to the first socket (eg, /dev/log), clients may only enqueue more 323 * messages if the present receive queue of the server socket is not 324 * "too large". This means there's a second writeability condition 325 * poll and sendmsg need to test. The dgram recv code will do a wake 326 * up on the peer_wait wait queue of a socket upon reception of a 327 * datagram which needs to be propagated to sleeping would-be writers 328 * since these might not have sent anything so far. This can't be 329 * accomplished via poll_wait because the lifetime of the server 330 * socket might be less than that of its clients if these break their 331 * association with it or if the server socket is closed while clients 332 * are still connected to it and there's no way to inform "a polling 333 * implementation" that it should let go of a certain wait queue 334 * 335 * In order to propagate a wake up, a wait_queue_t of the client 336 * socket is enqueued on the peer_wait queue of the server socket 337 * whose wake function does a wake_up on the ordinary client socket 338 * wait queue. This connection is established whenever a write (or 339 * poll for write) hit the flow control condition and broken when the 340 * association to the server socket is dissolved or after a wake up 341 * was relayed. 342 */ 343 344static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags, 345 void *key) 346{ 347 struct unix_sock *u; 348 wait_queue_head_t *u_sleep; 349 350 u = container_of(q, struct unix_sock, peer_wake); 351 352 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, 353 q); 354 u->peer_wake.private = NULL; 355 356 /* relaying can only happen while the wq still exists */ 357 u_sleep = sk_sleep(&u->sk); 358 if (u_sleep) 359 wake_up_interruptible_poll(u_sleep, key); 360 361 return 0; 362} 363 364static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) 365{ 366 struct unix_sock *u, *u_other; 367 int rc; 368 369 u = unix_sk(sk); 370 u_other = unix_sk(other); 371 rc = 0; 372 spin_lock(&u_other->peer_wait.lock); 373 374 if (!u->peer_wake.private) { 375 u->peer_wake.private = other; 376 __add_wait_queue(&u_other->peer_wait, &u->peer_wake); 377 378 rc = 1; 379 } 380 381 spin_unlock(&u_other->peer_wait.lock); 382 return rc; 383} 384 385static void unix_dgram_peer_wake_disconnect(struct sock *sk, 386 struct sock *other) 387{ 388 struct unix_sock *u, *u_other; 389 390 u = unix_sk(sk); 391 u_other = unix_sk(other); 392 spin_lock(&u_other->peer_wait.lock); 393 394 if (u->peer_wake.private == other) { 395 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); 396 u->peer_wake.private = NULL; 397 } 398 399 spin_unlock(&u_other->peer_wait.lock); 400} 401 402static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, 403 struct sock *other) 404{ 405 unix_dgram_peer_wake_disconnect(sk, other); 406 wake_up_interruptible_poll(sk_sleep(sk), 407 POLLOUT | 408 POLLWRNORM | 409 POLLWRBAND); 410} 411 412/* preconditions: 413 * - unix_peer(sk) == other 414 * - association is stable 415 */ 416static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) 417{ 418 int connected; 419 420 connected = unix_dgram_peer_wake_connect(sk, other); 421 422 if (unix_recvq_full(other)) 423 return 1; 424 425 if (connected) 426 unix_dgram_peer_wake_disconnect(sk, other); 427 428 return 0; 429} 430 431static inline int unix_writable(struct sock *sk) 432{ 433 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 434} 435 436static void unix_write_space(struct sock *sk) 437{ 438 struct socket_wq *wq; 439 440 rcu_read_lock(); 441 if (unix_writable(sk)) { 442 wq = rcu_dereference(sk->sk_wq); 443 if (wq_has_sleeper(wq)) 444 wake_up_interruptible_sync_poll(&wq->wait, 445 POLLOUT | POLLWRNORM | POLLWRBAND); 446 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 447 } 448 rcu_read_unlock(); 449} 450 451/* When dgram socket disconnects (or changes its peer), we clear its receive 452 * queue of packets arrived from previous peer. First, it allows to do 453 * flow control based only on wmem_alloc; second, sk connected to peer 454 * may receive messages only from that peer. */ 455static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 456{ 457 if (!skb_queue_empty(&sk->sk_receive_queue)) { 458 skb_queue_purge(&sk->sk_receive_queue); 459 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 460 461 /* If one link of bidirectional dgram pipe is disconnected, 462 * we signal error. Messages are lost. Do not make this, 463 * when peer was not connected to us. 464 */ 465 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { 466 other->sk_err = ECONNRESET; 467 other->sk_error_report(other); 468 } 469 } 470} 471 472static void unix_sock_destructor(struct sock *sk) 473{ 474 struct unix_sock *u = unix_sk(sk); 475 476 skb_queue_purge(&sk->sk_receive_queue); 477 478 WARN_ON(atomic_read(&sk->sk_wmem_alloc)); 479 WARN_ON(!sk_unhashed(sk)); 480 WARN_ON(sk->sk_socket); 481 if (!sock_flag(sk, SOCK_DEAD)) { 482 pr_info("Attempt to release alive unix socket: %p\n", sk); 483 return; 484 } 485 486 if (u->addr) 487 unix_release_addr(u->addr); 488 489 atomic_long_dec(&unix_nr_socks); 490 local_bh_disable(); 491 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 492 local_bh_enable(); 493#ifdef UNIX_REFCNT_DEBUG 494 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, 495 atomic_long_read(&unix_nr_socks)); 496#endif 497} 498 499static void unix_release_sock(struct sock *sk, int embrion) 500{ 501 struct unix_sock *u = unix_sk(sk); 502 struct path path; 503 struct sock *skpair; 504 struct sk_buff *skb; 505 int state; 506 507 unix_remove_socket(sk); 508 509 /* Clear state */ 510 unix_state_lock(sk); 511 sock_orphan(sk); 512 sk->sk_shutdown = SHUTDOWN_MASK; 513 path = u->path; 514 u->path.dentry = NULL; 515 u->path.mnt = NULL; 516 state = sk->sk_state; 517 sk->sk_state = TCP_CLOSE; 518 unix_state_unlock(sk); 519 520 wake_up_interruptible_all(&u->peer_wait); 521 522 skpair = unix_peer(sk); 523 524 if (skpair != NULL) { 525 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 526 unix_state_lock(skpair); 527 /* No more writes */ 528 skpair->sk_shutdown = SHUTDOWN_MASK; 529 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 530 skpair->sk_err = ECONNRESET; 531 unix_state_unlock(skpair); 532 skpair->sk_state_change(skpair); 533 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 534 } 535 536 unix_dgram_peer_wake_disconnect(sk, skpair); 537 sock_put(skpair); /* It may now die */ 538 unix_peer(sk) = NULL; 539 } 540 541 /* Try to flush out this socket. Throw out buffers at least */ 542 543 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 544 if (state == TCP_LISTEN) 545 unix_release_sock(skb->sk, 1); 546 /* passed fds are erased in the kfree_skb hook */ 547 kfree_skb(skb); 548 } 549 550 if (path.dentry) 551 path_put(&path); 552 553 sock_put(sk); 554 555 /* ---- Socket is dead now and most probably destroyed ---- */ 556 557 /* 558 * Fixme: BSD difference: In BSD all sockets connected to us get 559 * ECONNRESET and we die on the spot. In Linux we behave 560 * like files and pipes do and wait for the last 561 * dereference. 562 * 563 * Can't we simply set sock->err? 564 * 565 * What the above comment does talk about? --ANK(980817) 566 */ 567 568 if (unix_tot_inflight) 569 unix_gc(); /* Garbage collect fds */ 570} 571 572static void init_peercred(struct sock *sk) 573{ 574 put_pid(sk->sk_peer_pid); 575 if (sk->sk_peer_cred) 576 put_cred(sk->sk_peer_cred); 577 sk->sk_peer_pid = get_pid(task_tgid(current)); 578 sk->sk_peer_cred = get_current_cred(); 579} 580 581static void copy_peercred(struct sock *sk, struct sock *peersk) 582{ 583 put_pid(sk->sk_peer_pid); 584 if (sk->sk_peer_cred) 585 put_cred(sk->sk_peer_cred); 586 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); 587 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); 588} 589 590static int unix_listen(struct socket *sock, int backlog) 591{ 592 int err; 593 struct sock *sk = sock->sk; 594 struct unix_sock *u = unix_sk(sk); 595 struct pid *old_pid = NULL; 596 597 err = -EOPNOTSUPP; 598 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 599 goto out; /* Only stream/seqpacket sockets accept */ 600 err = -EINVAL; 601 if (!u->addr) 602 goto out; /* No listens on an unbound socket */ 603 unix_state_lock(sk); 604 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) 605 goto out_unlock; 606 if (backlog > sk->sk_max_ack_backlog) 607 wake_up_interruptible_all(&u->peer_wait); 608 sk->sk_max_ack_backlog = backlog; 609 sk->sk_state = TCP_LISTEN; 610 /* set credentials so connect can copy them */ 611 init_peercred(sk); 612 err = 0; 613 614out_unlock: 615 unix_state_unlock(sk); 616 put_pid(old_pid); 617out: 618 return err; 619} 620 621static int unix_release(struct socket *); 622static int unix_bind(struct socket *, struct sockaddr *, int); 623static int unix_stream_connect(struct socket *, struct sockaddr *, 624 int addr_len, int flags); 625static int unix_socketpair(struct socket *, struct socket *); 626static int unix_accept(struct socket *, struct socket *, int); 627static int unix_getname(struct socket *, struct sockaddr *, int *, int); 628static unsigned int unix_poll(struct file *, struct socket *, poll_table *); 629static unsigned int unix_dgram_poll(struct file *, struct socket *, 630 poll_table *); 631static int unix_ioctl(struct socket *, unsigned int, unsigned long); 632static int unix_shutdown(struct socket *, int); 633static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); 634static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); 635static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); 636static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); 637static int unix_dgram_connect(struct socket *, struct sockaddr *, 638 int, int); 639static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); 640static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, 641 int); 642 643static int unix_set_peek_off(struct sock *sk, int val) 644{ 645 struct unix_sock *u = unix_sk(sk); 646 647 if (mutex_lock_interruptible(&u->readlock)) 648 return -EINTR; 649 650 sk->sk_peek_off = val; 651 mutex_unlock(&u->readlock); 652 653 return 0; 654} 655 656 657static const struct proto_ops unix_stream_ops = { 658 .family = PF_UNIX, 659 .owner = THIS_MODULE, 660 .release = unix_release, 661 .bind = unix_bind, 662 .connect = unix_stream_connect, 663 .socketpair = unix_socketpair, 664 .accept = unix_accept, 665 .getname = unix_getname, 666 .poll = unix_poll, 667 .ioctl = unix_ioctl, 668 .listen = unix_listen, 669 .shutdown = unix_shutdown, 670 .setsockopt = sock_no_setsockopt, 671 .getsockopt = sock_no_getsockopt, 672 .sendmsg = unix_stream_sendmsg, 673 .recvmsg = unix_stream_recvmsg, 674 .mmap = sock_no_mmap, 675 .sendpage = sock_no_sendpage, 676 .set_peek_off = unix_set_peek_off, 677}; 678 679static const struct proto_ops unix_dgram_ops = { 680 .family = PF_UNIX, 681 .owner = THIS_MODULE, 682 .release = unix_release, 683 .bind = unix_bind, 684 .connect = unix_dgram_connect, 685 .socketpair = unix_socketpair, 686 .accept = sock_no_accept, 687 .getname = unix_getname, 688 .poll = unix_dgram_poll, 689 .ioctl = unix_ioctl, 690 .listen = sock_no_listen, 691 .shutdown = unix_shutdown, 692 .setsockopt = sock_no_setsockopt, 693 .getsockopt = sock_no_getsockopt, 694 .sendmsg = unix_dgram_sendmsg, 695 .recvmsg = unix_dgram_recvmsg, 696 .mmap = sock_no_mmap, 697 .sendpage = sock_no_sendpage, 698 .set_peek_off = unix_set_peek_off, 699}; 700 701static const struct proto_ops unix_seqpacket_ops = { 702 .family = PF_UNIX, 703 .owner = THIS_MODULE, 704 .release = unix_release, 705 .bind = unix_bind, 706 .connect = unix_stream_connect, 707 .socketpair = unix_socketpair, 708 .accept = unix_accept, 709 .getname = unix_getname, 710 .poll = unix_dgram_poll, 711 .ioctl = unix_ioctl, 712 .listen = unix_listen, 713 .shutdown = unix_shutdown, 714 .setsockopt = sock_no_setsockopt, 715 .getsockopt = sock_no_getsockopt, 716 .sendmsg = unix_seqpacket_sendmsg, 717 .recvmsg = unix_seqpacket_recvmsg, 718 .mmap = sock_no_mmap, 719 .sendpage = sock_no_sendpage, 720 .set_peek_off = unix_set_peek_off, 721}; 722 723static struct proto unix_proto = { 724 .name = "UNIX", 725 .owner = THIS_MODULE, 726 .obj_size = sizeof(struct unix_sock), 727}; 728 729/* 730 * AF_UNIX sockets do not interact with hardware, hence they 731 * dont trigger interrupts - so it's safe for them to have 732 * bh-unsafe locking for their sk_receive_queue.lock. Split off 733 * this special lock-class by reinitializing the spinlock key: 734 */ 735static struct lock_class_key af_unix_sk_receive_queue_lock_key; 736 737static struct sock *unix_create1(struct net *net, struct socket *sock) 738{ 739 struct sock *sk = NULL; 740 struct unix_sock *u; 741 742 atomic_long_inc(&unix_nr_socks); 743 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) 744 goto out; 745 746 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto); 747 if (!sk) 748 goto out; 749 750 sock_init_data(sock, sk); 751 lockdep_set_class(&sk->sk_receive_queue.lock, 752 &af_unix_sk_receive_queue_lock_key); 753 754 sk->sk_write_space = unix_write_space; 755 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; 756 sk->sk_destruct = unix_sock_destructor; 757 u = unix_sk(sk); 758 u->path.dentry = NULL; 759 u->path.mnt = NULL; 760 spin_lock_init(&u->lock); 761 atomic_long_set(&u->inflight, 0); 762 INIT_LIST_HEAD(&u->link); 763 mutex_init(&u->readlock); /* single task reading lock */ 764 init_waitqueue_head(&u->peer_wait); 765 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); 766 unix_insert_socket(unix_sockets_unbound(sk), sk); 767out: 768 if (sk == NULL) 769 atomic_long_dec(&unix_nr_socks); 770 else { 771 local_bh_disable(); 772 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 773 local_bh_enable(); 774 } 775 return sk; 776} 777 778static int unix_create(struct net *net, struct socket *sock, int protocol, 779 int kern) 780{ 781 if (protocol && protocol != PF_UNIX) 782 return -EPROTONOSUPPORT; 783 784 sock->state = SS_UNCONNECTED; 785 786 switch (sock->type) { 787 case SOCK_STREAM: 788 sock->ops = &unix_stream_ops; 789 break; 790 /* 791 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 792 * nothing uses it. 793 */ 794 case SOCK_RAW: 795 sock->type = SOCK_DGRAM; 796 case SOCK_DGRAM: 797 sock->ops = &unix_dgram_ops; 798 break; 799 case SOCK_SEQPACKET: 800 sock->ops = &unix_seqpacket_ops; 801 break; 802 default: 803 return -ESOCKTNOSUPPORT; 804 } 805 806 return unix_create1(net, sock) ? 0 : -ENOMEM; 807} 808 809static int unix_release(struct socket *sock) 810{ 811 struct sock *sk = sock->sk; 812 813 if (!sk) 814 return 0; 815 816 unix_release_sock(sk, 0); 817 sock->sk = NULL; 818 819 return 0; 820} 821 822static int unix_autobind(struct socket *sock) 823{ 824 struct sock *sk = sock->sk; 825 struct net *net = sock_net(sk); 826 struct unix_sock *u = unix_sk(sk); 827 static u32 ordernum = 1; 828 struct unix_address *addr; 829 int err; 830 unsigned int retries = 0; 831 832 err = mutex_lock_interruptible(&u->readlock); 833 if (err) 834 return err; 835 836 err = 0; 837 if (u->addr) 838 goto out; 839 840 err = -ENOMEM; 841 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL); 842 if (!addr) 843 goto out; 844 845 addr->name->sun_family = AF_UNIX; 846 atomic_set(&addr->refcnt, 1); 847 848retry: 849 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); 850 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0)); 851 852 spin_lock(&unix_table_lock); 853 ordernum = (ordernum+1)&0xFFFFF; 854 855 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type, 856 addr->hash)) { 857 spin_unlock(&unix_table_lock); 858 /* 859 * __unix_find_socket_byname() may take long time if many names 860 * are already in use. 861 */ 862 cond_resched(); 863 /* Give up if all names seems to be in use. */ 864 if (retries++ == 0xFFFFF) { 865 err = -ENOSPC; 866 kfree(addr); 867 goto out; 868 } 869 goto retry; 870 } 871 addr->hash ^= sk->sk_type; 872 873 __unix_remove_socket(sk); 874 u->addr = addr; 875 __unix_insert_socket(&unix_socket_table[addr->hash], sk); 876 spin_unlock(&unix_table_lock); 877 err = 0; 878 879out: mutex_unlock(&u->readlock); 880 return err; 881} 882 883static struct sock *unix_find_other(struct net *net, 884 struct sockaddr_un *sunname, int len, 885 int type, unsigned int hash, int *error) 886{ 887 struct sock *u; 888 struct path path; 889 int err = 0; 890 891 if (sunname->sun_path[0]) { 892 struct inode *inode; 893 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path); 894 if (err) 895 goto fail; 896 inode = d_backing_inode(path.dentry); 897 err = inode_permission(inode, MAY_WRITE); 898 if (err) 899 goto put_fail; 900 901 err = -ECONNREFUSED; 902 if (!S_ISSOCK(inode->i_mode)) 903 goto put_fail; 904 u = unix_find_socket_byinode(inode); 905 if (!u) 906 goto put_fail; 907 908 if (u->sk_type == type) 909 touch_atime(&path); 910 911 path_put(&path); 912 913 err = -EPROTOTYPE; 914 if (u->sk_type != type) { 915 sock_put(u); 916 goto fail; 917 } 918 } else { 919 err = -ECONNREFUSED; 920 u = unix_find_socket_byname(net, sunname, len, type, hash); 921 if (u) { 922 struct dentry *dentry; 923 dentry = unix_sk(u)->path.dentry; 924 if (dentry) 925 touch_atime(&unix_sk(u)->path); 926 } else 927 goto fail; 928 } 929 return u; 930 931put_fail: 932 path_put(&path); 933fail: 934 *error = err; 935 return NULL; 936} 937 938static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) 939{ 940 struct dentry *dentry; 941 struct path path; 942 int err = 0; 943 /* 944 * Get the parent directory, calculate the hash for last 945 * component. 946 */ 947 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); 948 err = PTR_ERR(dentry); 949 if (IS_ERR(dentry)) 950 return err; 951 952 /* 953 * All right, let's create it. 954 */ 955 err = security_path_mknod(&path, dentry, mode, 0); 956 if (!err) { 957 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0); 958 if (!err) { 959 res->mnt = mntget(path.mnt); 960 res->dentry = dget(dentry); 961 } 962 } 963 done_path_create(&path, dentry); 964 return err; 965} 966 967static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 968{ 969 struct sock *sk = sock->sk; 970 struct net *net = sock_net(sk); 971 struct unix_sock *u = unix_sk(sk); 972 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 973 char *sun_path = sunaddr->sun_path; 974 int err; 975 unsigned int hash; 976 struct unix_address *addr; 977 struct hlist_head *list; 978 979 err = -EINVAL; 980 if (sunaddr->sun_family != AF_UNIX) 981 goto out; 982 983 if (addr_len == sizeof(short)) { 984 err = unix_autobind(sock); 985 goto out; 986 } 987 988 err = unix_mkname(sunaddr, addr_len, &hash); 989 if (err < 0) 990 goto out; 991 addr_len = err; 992 993 err = mutex_lock_interruptible(&u->readlock); 994 if (err) 995 goto out; 996 997 err = -EINVAL; 998 if (u->addr) 999 goto out_up; 1000 1001 err = -ENOMEM; 1002 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); 1003 if (!addr) 1004 goto out_up; 1005 1006 memcpy(addr->name, sunaddr, addr_len); 1007 addr->len = addr_len; 1008 addr->hash = hash ^ sk->sk_type; 1009 atomic_set(&addr->refcnt, 1); 1010 1011 if (sun_path[0]) { 1012 struct path path; 1013 umode_t mode = S_IFSOCK | 1014 (SOCK_INODE(sock)->i_mode & ~current_umask()); 1015 err = unix_mknod(sun_path, mode, &path); 1016 if (err) { 1017 if (err == -EEXIST) 1018 err = -EADDRINUSE; 1019 unix_release_addr(addr); 1020 goto out_up; 1021 } 1022 addr->hash = UNIX_HASH_SIZE; 1023 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1); 1024 spin_lock(&unix_table_lock); 1025 u->path = path; 1026 list = &unix_socket_table[hash]; 1027 } else { 1028 spin_lock(&unix_table_lock); 1029 err = -EADDRINUSE; 1030 if (__unix_find_socket_byname(net, sunaddr, addr_len, 1031 sk->sk_type, hash)) { 1032 unix_release_addr(addr); 1033 goto out_unlock; 1034 } 1035 1036 list = &unix_socket_table[addr->hash]; 1037 } 1038 1039 err = 0; 1040 __unix_remove_socket(sk); 1041 u->addr = addr; 1042 __unix_insert_socket(list, sk); 1043 1044out_unlock: 1045 spin_unlock(&unix_table_lock); 1046out_up: 1047 mutex_unlock(&u->readlock); 1048out: 1049 return err; 1050} 1051 1052static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) 1053{ 1054 if (unlikely(sk1 == sk2) || !sk2) { 1055 unix_state_lock(sk1); 1056 return; 1057 } 1058 if (sk1 < sk2) { 1059 unix_state_lock(sk1); 1060 unix_state_lock_nested(sk2); 1061 } else { 1062 unix_state_lock(sk2); 1063 unix_state_lock_nested(sk1); 1064 } 1065} 1066 1067static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) 1068{ 1069 if (unlikely(sk1 == sk2) || !sk2) { 1070 unix_state_unlock(sk1); 1071 return; 1072 } 1073 unix_state_unlock(sk1); 1074 unix_state_unlock(sk2); 1075} 1076 1077static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, 1078 int alen, int flags) 1079{ 1080 struct sock *sk = sock->sk; 1081 struct net *net = sock_net(sk); 1082 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; 1083 struct sock *other; 1084 unsigned int hash; 1085 int err; 1086 1087 if (addr->sa_family != AF_UNSPEC) { 1088 err = unix_mkname(sunaddr, alen, &hash); 1089 if (err < 0) 1090 goto out; 1091 alen = err; 1092 1093 if (test_bit(SOCK_PASSCRED, &sock->flags) && 1094 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0) 1095 goto out; 1096 1097restart: 1098 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err); 1099 if (!other) 1100 goto out; 1101 1102 unix_state_double_lock(sk, other); 1103 1104 /* Apparently VFS overslept socket death. Retry. */ 1105 if (sock_flag(other, SOCK_DEAD)) { 1106 unix_state_double_unlock(sk, other); 1107 sock_put(other); 1108 goto restart; 1109 } 1110 1111 err = -EPERM; 1112 if (!unix_may_send(sk, other)) 1113 goto out_unlock; 1114 1115 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1116 if (err) 1117 goto out_unlock; 1118 1119 } else { 1120 /* 1121 * 1003.1g breaking connected state with AF_UNSPEC 1122 */ 1123 other = NULL; 1124 unix_state_double_lock(sk, other); 1125 } 1126 1127 /* 1128 * If it was connected, reconnect. 1129 */ 1130 if (unix_peer(sk)) { 1131 struct sock *old_peer = unix_peer(sk); 1132 unix_peer(sk) = other; 1133 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); 1134 1135 unix_state_double_unlock(sk, other); 1136 1137 if (other != old_peer) 1138 unix_dgram_disconnected(sk, old_peer); 1139 sock_put(old_peer); 1140 } else { 1141 unix_peer(sk) = other; 1142 unix_state_double_unlock(sk, other); 1143 } 1144 return 0; 1145 1146out_unlock: 1147 unix_state_double_unlock(sk, other); 1148 sock_put(other); 1149out: 1150 return err; 1151} 1152 1153static long unix_wait_for_peer(struct sock *other, long timeo) 1154{ 1155 struct unix_sock *u = unix_sk(other); 1156 int sched; 1157 DEFINE_WAIT(wait); 1158 1159 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); 1160 1161 sched = !sock_flag(other, SOCK_DEAD) && 1162 !(other->sk_shutdown & RCV_SHUTDOWN) && 1163 unix_recvq_full(other); 1164 1165 unix_state_unlock(other); 1166 1167 if (sched) 1168 timeo = schedule_timeout(timeo); 1169 1170 finish_wait(&u->peer_wait, &wait); 1171 return timeo; 1172} 1173 1174static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, 1175 int addr_len, int flags) 1176{ 1177 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1178 struct sock *sk = sock->sk; 1179 struct net *net = sock_net(sk); 1180 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 1181 struct sock *newsk = NULL; 1182 struct sock *other = NULL; 1183 struct sk_buff *skb = NULL; 1184 unsigned int hash; 1185 int st; 1186 int err; 1187 long timeo; 1188 1189 err = unix_mkname(sunaddr, addr_len, &hash); 1190 if (err < 0) 1191 goto out; 1192 addr_len = err; 1193 1194 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr && 1195 (err = unix_autobind(sock)) != 0) 1196 goto out; 1197 1198 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 1199 1200 /* First of all allocate resources. 1201 If we will make it after state is locked, 1202 we will have to recheck all again in any case. 1203 */ 1204 1205 err = -ENOMEM; 1206 1207 /* create new sock for complete connection */ 1208 newsk = unix_create1(sock_net(sk), NULL); 1209 if (newsk == NULL) 1210 goto out; 1211 1212 /* Allocate skb for sending to listening sock */ 1213 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); 1214 if (skb == NULL) 1215 goto out; 1216 1217restart: 1218 /* Find listening sock. */ 1219 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err); 1220 if (!other) 1221 goto out; 1222 1223 /* Latch state of peer */ 1224 unix_state_lock(other); 1225 1226 /* Apparently VFS overslept socket death. Retry. */ 1227 if (sock_flag(other, SOCK_DEAD)) { 1228 unix_state_unlock(other); 1229 sock_put(other); 1230 goto restart; 1231 } 1232 1233 err = -ECONNREFUSED; 1234 if (other->sk_state != TCP_LISTEN) 1235 goto out_unlock; 1236 if (other->sk_shutdown & RCV_SHUTDOWN) 1237 goto out_unlock; 1238 1239 if (unix_recvq_full(other)) { 1240 err = -EAGAIN; 1241 if (!timeo) 1242 goto out_unlock; 1243 1244 timeo = unix_wait_for_peer(other, timeo); 1245 1246 err = sock_intr_errno(timeo); 1247 if (signal_pending(current)) 1248 goto out; 1249 sock_put(other); 1250 goto restart; 1251 } 1252 1253 /* Latch our state. 1254 1255 It is tricky place. We need to grab our state lock and cannot 1256 drop lock on peer. It is dangerous because deadlock is 1257 possible. Connect to self case and simultaneous 1258 attempt to connect are eliminated by checking socket 1259 state. other is TCP_LISTEN, if sk is TCP_LISTEN we 1260 check this before attempt to grab lock. 1261 1262 Well, and we have to recheck the state after socket locked. 1263 */ 1264 st = sk->sk_state; 1265 1266 switch (st) { 1267 case TCP_CLOSE: 1268 /* This is ok... continue with connect */ 1269 break; 1270 case TCP_ESTABLISHED: 1271 /* Socket is already connected */ 1272 err = -EISCONN; 1273 goto out_unlock; 1274 default: 1275 err = -EINVAL; 1276 goto out_unlock; 1277 } 1278 1279 unix_state_lock_nested(sk); 1280 1281 if (sk->sk_state != st) { 1282 unix_state_unlock(sk); 1283 unix_state_unlock(other); 1284 sock_put(other); 1285 goto restart; 1286 } 1287 1288 err = security_unix_stream_connect(sk, other, newsk); 1289 if (err) { 1290 unix_state_unlock(sk); 1291 goto out_unlock; 1292 } 1293 1294 /* The way is open! Fastly set all the necessary fields... */ 1295 1296 sock_hold(sk); 1297 unix_peer(newsk) = sk; 1298 newsk->sk_state = TCP_ESTABLISHED; 1299 newsk->sk_type = sk->sk_type; 1300 init_peercred(newsk); 1301 newu = unix_sk(newsk); 1302 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); 1303 otheru = unix_sk(other); 1304 1305 /* copy address information from listening to new sock*/ 1306 if (otheru->addr) { 1307 atomic_inc(&otheru->addr->refcnt); 1308 newu->addr = otheru->addr; 1309 } 1310 if (otheru->path.dentry) { 1311 path_get(&otheru->path); 1312 newu->path = otheru->path; 1313 } 1314 1315 /* Set credentials */ 1316 copy_peercred(sk, other); 1317 1318 sock->state = SS_CONNECTED; 1319 sk->sk_state = TCP_ESTABLISHED; 1320 sock_hold(newsk); 1321 1322 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ 1323 unix_peer(sk) = newsk; 1324 1325 unix_state_unlock(sk); 1326 1327 /* take ten and and send info to listening sock */ 1328 spin_lock(&other->sk_receive_queue.lock); 1329 __skb_queue_tail(&other->sk_receive_queue, skb); 1330 spin_unlock(&other->sk_receive_queue.lock); 1331 unix_state_unlock(other); 1332 other->sk_data_ready(other); 1333 sock_put(other); 1334 return 0; 1335 1336out_unlock: 1337 if (other) 1338 unix_state_unlock(other); 1339 1340out: 1341 kfree_skb(skb); 1342 if (newsk) 1343 unix_release_sock(newsk, 0); 1344 if (other) 1345 sock_put(other); 1346 return err; 1347} 1348 1349static int unix_socketpair(struct socket *socka, struct socket *sockb) 1350{ 1351 struct sock *ska = socka->sk, *skb = sockb->sk; 1352 1353 /* Join our sockets back to back */ 1354 sock_hold(ska); 1355 sock_hold(skb); 1356 unix_peer(ska) = skb; 1357 unix_peer(skb) = ska; 1358 init_peercred(ska); 1359 init_peercred(skb); 1360 1361 if (ska->sk_type != SOCK_DGRAM) { 1362 ska->sk_state = TCP_ESTABLISHED; 1363 skb->sk_state = TCP_ESTABLISHED; 1364 socka->state = SS_CONNECTED; 1365 sockb->state = SS_CONNECTED; 1366 } 1367 return 0; 1368} 1369 1370static void unix_sock_inherit_flags(const struct socket *old, 1371 struct socket *new) 1372{ 1373 if (test_bit(SOCK_PASSCRED, &old->flags)) 1374 set_bit(SOCK_PASSCRED, &new->flags); 1375 if (test_bit(SOCK_PASSSEC, &old->flags)) 1376 set_bit(SOCK_PASSSEC, &new->flags); 1377} 1378 1379static int unix_accept(struct socket *sock, struct socket *newsock, int flags) 1380{ 1381 struct sock *sk = sock->sk; 1382 struct sock *tsk; 1383 struct sk_buff *skb; 1384 int err; 1385 1386 err = -EOPNOTSUPP; 1387 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 1388 goto out; 1389 1390 err = -EINVAL; 1391 if (sk->sk_state != TCP_LISTEN) 1392 goto out; 1393 1394 /* If socket state is TCP_LISTEN it cannot change (for now...), 1395 * so that no locks are necessary. 1396 */ 1397 1398 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err); 1399 if (!skb) { 1400 /* This means receive shutdown. */ 1401 if (err == 0) 1402 err = -EINVAL; 1403 goto out; 1404 } 1405 1406 tsk = skb->sk; 1407 skb_free_datagram(sk, skb); 1408 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1409 1410 /* attach accepted sock to socket */ 1411 unix_state_lock(tsk); 1412 newsock->state = SS_CONNECTED; 1413 unix_sock_inherit_flags(sock, newsock); 1414 sock_graft(tsk, newsock); 1415 unix_state_unlock(tsk); 1416 return 0; 1417 1418out: 1419 return err; 1420} 1421 1422 1423static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) 1424{ 1425 struct sock *sk = sock->sk; 1426 struct unix_sock *u; 1427 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); 1428 int err = 0; 1429 1430 if (peer) { 1431 sk = unix_peer_get(sk); 1432 1433 err = -ENOTCONN; 1434 if (!sk) 1435 goto out; 1436 err = 0; 1437 } else { 1438 sock_hold(sk); 1439 } 1440 1441 u = unix_sk(sk); 1442 unix_state_lock(sk); 1443 if (!u->addr) { 1444 sunaddr->sun_family = AF_UNIX; 1445 sunaddr->sun_path[0] = 0; 1446 *uaddr_len = sizeof(short); 1447 } else { 1448 struct unix_address *addr = u->addr; 1449 1450 *uaddr_len = addr->len; 1451 memcpy(sunaddr, addr->name, *uaddr_len); 1452 } 1453 unix_state_unlock(sk); 1454 sock_put(sk); 1455out: 1456 return err; 1457} 1458 1459static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) 1460{ 1461 int i; 1462 1463 scm->fp = UNIXCB(skb).fp; 1464 UNIXCB(skb).fp = NULL; 1465 1466 for (i = scm->fp->count-1; i >= 0; i--) 1467 unix_notinflight(scm->fp->user, scm->fp->fp[i]); 1468} 1469 1470static void unix_destruct_scm(struct sk_buff *skb) 1471{ 1472 struct scm_cookie scm; 1473 memset(&scm, 0, sizeof(scm)); 1474 scm.pid = UNIXCB(skb).pid; 1475 if (UNIXCB(skb).fp) 1476 unix_detach_fds(&scm, skb); 1477 1478 /* Alas, it calls VFS */ 1479 /* So fscking what? fput() had been SMP-safe since the last Summer */ 1480 scm_destroy(&scm); 1481 sock_wfree(skb); 1482} 1483 1484/* 1485 * The "user->unix_inflight" variable is protected by the garbage 1486 * collection lock, and we just read it locklessly here. If you go 1487 * over the limit, there might be a tiny race in actually noticing 1488 * it across threads. Tough. 1489 */ 1490static inline bool too_many_unix_fds(struct task_struct *p) 1491{ 1492 struct user_struct *user = current_user(); 1493 1494 if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE))) 1495 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); 1496 return false; 1497} 1498 1499#define MAX_RECURSION_LEVEL 4 1500 1501static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) 1502{ 1503 int i; 1504 unsigned char max_level = 0; 1505 int unix_sock_count = 0; 1506 1507 if (too_many_unix_fds(current)) 1508 return -ETOOMANYREFS; 1509 1510 for (i = scm->fp->count - 1; i >= 0; i--) { 1511 struct sock *sk = unix_get_socket(scm->fp->fp[i]); 1512 1513 if (sk) { 1514 unix_sock_count++; 1515 max_level = max(max_level, 1516 unix_sk(sk)->recursion_level); 1517 } 1518 } 1519 if (unlikely(max_level > MAX_RECURSION_LEVEL)) 1520 return -ETOOMANYREFS; 1521 1522 /* 1523 * Need to duplicate file references for the sake of garbage 1524 * collection. Otherwise a socket in the fps might become a 1525 * candidate for GC while the skb is not yet queued. 1526 */ 1527 UNIXCB(skb).fp = scm_fp_dup(scm->fp); 1528 if (!UNIXCB(skb).fp) 1529 return -ENOMEM; 1530 1531 for (i = scm->fp->count - 1; i >= 0; i--) 1532 unix_inflight(scm->fp->user, scm->fp->fp[i]); 1533 return max_level; 1534} 1535 1536static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1537{ 1538 int err = 0; 1539 1540 UNIXCB(skb).pid = get_pid(scm->pid); 1541 UNIXCB(skb).uid = scm->creds.uid; 1542 UNIXCB(skb).gid = scm->creds.gid; 1543 UNIXCB(skb).fp = NULL; 1544 if (scm->fp && send_fds) 1545 err = unix_attach_fds(scm, skb); 1546 1547 skb->destructor = unix_destruct_scm; 1548 return err; 1549} 1550 1551/* 1552 * Some apps rely on write() giving SCM_CREDENTIALS 1553 * We include credentials if source or destination socket 1554 * asserted SOCK_PASSCRED. 1555 */ 1556static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, 1557 const struct sock *other) 1558{ 1559 if (UNIXCB(skb).pid) 1560 return; 1561 if (test_bit(SOCK_PASSCRED, &sock->flags) || 1562 !other->sk_socket || 1563 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { 1564 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1565 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); 1566 } 1567} 1568 1569/* 1570 * Send AF_UNIX data. 1571 */ 1572 1573static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, 1574 size_t len) 1575{ 1576 struct sock *sk = sock->sk; 1577 struct net *net = sock_net(sk); 1578 struct unix_sock *u = unix_sk(sk); 1579 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); 1580 struct sock *other = NULL; 1581 int namelen = 0; /* fake GCC */ 1582 int err; 1583 unsigned int hash; 1584 struct sk_buff *skb; 1585 long timeo; 1586 struct scm_cookie scm; 1587 int max_level; 1588 int data_len = 0; 1589 int sk_locked; 1590 1591 wait_for_unix_gc(); 1592 err = scm_send(sock, msg, &scm, false); 1593 if (err < 0) 1594 return err; 1595 1596 err = -EOPNOTSUPP; 1597 if (msg->msg_flags&MSG_OOB) 1598 goto out; 1599 1600 if (msg->msg_namelen) { 1601 err = unix_mkname(sunaddr, msg->msg_namelen, &hash); 1602 if (err < 0) 1603 goto out; 1604 namelen = err; 1605 } else { 1606 sunaddr = NULL; 1607 err = -ENOTCONN; 1608 other = unix_peer_get(sk); 1609 if (!other) 1610 goto out; 1611 } 1612 1613 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr 1614 && (err = unix_autobind(sock)) != 0) 1615 goto out; 1616 1617 err = -EMSGSIZE; 1618 if (len > sk->sk_sndbuf - 32) 1619 goto out; 1620 1621 if (len > SKB_MAX_ALLOC) { 1622 data_len = min_t(size_t, 1623 len - SKB_MAX_ALLOC, 1624 MAX_SKB_FRAGS * PAGE_SIZE); 1625 data_len = PAGE_ALIGN(data_len); 1626 1627 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); 1628 } 1629 1630 skb = sock_alloc_send_pskb(sk, len - data_len, data_len, 1631 msg->msg_flags & MSG_DONTWAIT, &err, 1632 PAGE_ALLOC_COSTLY_ORDER); 1633 if (skb == NULL) 1634 goto out; 1635 1636 err = unix_scm_to_skb(&scm, skb, true); 1637 if (err < 0) 1638 goto out_free; 1639 max_level = err + 1; 1640 unix_get_secdata(&scm, skb); 1641 1642 skb_put(skb, len - data_len); 1643 skb->data_len = data_len; 1644 skb->len = len; 1645 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len); 1646 if (err) 1647 goto out_free; 1648 1649 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1650 1651restart: 1652 if (!other) { 1653 err = -ECONNRESET; 1654 if (sunaddr == NULL) 1655 goto out_free; 1656 1657 other = unix_find_other(net, sunaddr, namelen, sk->sk_type, 1658 hash, &err); 1659 if (other == NULL) 1660 goto out_free; 1661 } 1662 1663 if (sk_filter(other, skb) < 0) { 1664 /* Toss the packet but do not return any error to the sender */ 1665 err = len; 1666 goto out_free; 1667 } 1668 1669 sk_locked = 0; 1670 unix_state_lock(other); 1671restart_locked: 1672 err = -EPERM; 1673 if (!unix_may_send(sk, other)) 1674 goto out_unlock; 1675 1676 if (unlikely(sock_flag(other, SOCK_DEAD))) { 1677 /* 1678 * Check with 1003.1g - what should 1679 * datagram error 1680 */ 1681 unix_state_unlock(other); 1682 sock_put(other); 1683 1684 if (!sk_locked) 1685 unix_state_lock(sk); 1686 1687 err = 0; 1688 if (unix_peer(sk) == other) { 1689 unix_peer(sk) = NULL; 1690 unix_dgram_peer_wake_disconnect_wakeup(sk, other); 1691 1692 unix_state_unlock(sk); 1693 1694 unix_dgram_disconnected(sk, other); 1695 sock_put(other); 1696 err = -ECONNREFUSED; 1697 } else { 1698 unix_state_unlock(sk); 1699 } 1700 1701 other = NULL; 1702 if (err) 1703 goto out_free; 1704 goto restart; 1705 } 1706 1707 err = -EPIPE; 1708 if (other->sk_shutdown & RCV_SHUTDOWN) 1709 goto out_unlock; 1710 1711 if (sk->sk_type != SOCK_SEQPACKET) { 1712 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1713 if (err) 1714 goto out_unlock; 1715 } 1716 1717 /* other == sk && unix_peer(other) != sk if 1718 * - unix_peer(sk) == NULL, destination address bound to sk 1719 * - unix_peer(sk) == sk by time of get but disconnected before lock 1720 */ 1721 if (other != sk && 1722 unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { 1723 if (timeo) { 1724 timeo = unix_wait_for_peer(other, timeo); 1725 1726 err = sock_intr_errno(timeo); 1727 if (signal_pending(current)) 1728 goto out_free; 1729 1730 goto restart; 1731 } 1732 1733 if (!sk_locked) { 1734 unix_state_unlock(other); 1735 unix_state_double_lock(sk, other); 1736 } 1737 1738 if (unix_peer(sk) != other || 1739 unix_dgram_peer_wake_me(sk, other)) { 1740 err = -EAGAIN; 1741 sk_locked = 1; 1742 goto out_unlock; 1743 } 1744 1745 if (!sk_locked) { 1746 sk_locked = 1; 1747 goto restart_locked; 1748 } 1749 } 1750 1751 if (unlikely(sk_locked)) 1752 unix_state_unlock(sk); 1753 1754 if (sock_flag(other, SOCK_RCVTSTAMP)) 1755 __net_timestamp(skb); 1756 maybe_add_creds(skb, sock, other); 1757 skb_queue_tail(&other->sk_receive_queue, skb); 1758 if (max_level > unix_sk(other)->recursion_level) 1759 unix_sk(other)->recursion_level = max_level; 1760 unix_state_unlock(other); 1761 other->sk_data_ready(other); 1762 sock_put(other); 1763 scm_destroy(&scm); 1764 return len; 1765 1766out_unlock: 1767 if (sk_locked) 1768 unix_state_unlock(sk); 1769 unix_state_unlock(other); 1770out_free: 1771 kfree_skb(skb); 1772out: 1773 if (other) 1774 sock_put(other); 1775 scm_destroy(&scm); 1776 return err; 1777} 1778 1779/* We use paged skbs for stream sockets, and limit occupancy to 32768 1780 * bytes, and a minimun of a full page. 1781 */ 1782#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) 1783 1784static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, 1785 size_t len) 1786{ 1787 struct sock *sk = sock->sk; 1788 struct sock *other = NULL; 1789 int err, size; 1790 struct sk_buff *skb; 1791 int sent = 0; 1792 struct scm_cookie scm; 1793 bool fds_sent = false; 1794 int max_level; 1795 int data_len; 1796 1797 wait_for_unix_gc(); 1798 err = scm_send(sock, msg, &scm, false); 1799 if (err < 0) 1800 return err; 1801 1802 err = -EOPNOTSUPP; 1803 if (msg->msg_flags&MSG_OOB) 1804 goto out_err; 1805 1806 if (msg->msg_namelen) { 1807 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 1808 goto out_err; 1809 } else { 1810 err = -ENOTCONN; 1811 other = unix_peer(sk); 1812 if (!other) 1813 goto out_err; 1814 } 1815 1816 if (sk->sk_shutdown & SEND_SHUTDOWN) 1817 goto pipe_err; 1818 1819 while (sent < len) { 1820 size = len - sent; 1821 1822 /* Keep two messages in the pipe so it schedules better */ 1823 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); 1824 1825 /* allow fallback to order-0 allocations */ 1826 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); 1827 1828 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); 1829 1830 data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); 1831 1832 skb = sock_alloc_send_pskb(sk, size - data_len, data_len, 1833 msg->msg_flags & MSG_DONTWAIT, &err, 1834 get_order(UNIX_SKB_FRAGS_SZ)); 1835 if (!skb) 1836 goto out_err; 1837 1838 /* Only send the fds in the first buffer */ 1839 err = unix_scm_to_skb(&scm, skb, !fds_sent); 1840 if (err < 0) { 1841 kfree_skb(skb); 1842 goto out_err; 1843 } 1844 max_level = err + 1; 1845 fds_sent = true; 1846 1847 skb_put(skb, size - data_len); 1848 skb->data_len = data_len; 1849 skb->len = size; 1850 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); 1851 if (err) { 1852 kfree_skb(skb); 1853 goto out_err; 1854 } 1855 1856 unix_state_lock(other); 1857 1858 if (sock_flag(other, SOCK_DEAD) || 1859 (other->sk_shutdown & RCV_SHUTDOWN)) 1860 goto pipe_err_free; 1861 1862 maybe_add_creds(skb, sock, other); 1863 skb_queue_tail(&other->sk_receive_queue, skb); 1864 if (max_level > unix_sk(other)->recursion_level) 1865 unix_sk(other)->recursion_level = max_level; 1866 unix_state_unlock(other); 1867 other->sk_data_ready(other); 1868 sent += size; 1869 } 1870 1871 scm_destroy(&scm); 1872 1873 return sent; 1874 1875pipe_err_free: 1876 unix_state_unlock(other); 1877 kfree_skb(skb); 1878pipe_err: 1879 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) 1880 send_sig(SIGPIPE, current, 0); 1881 err = -EPIPE; 1882out_err: 1883 scm_destroy(&scm); 1884 return sent ? : err; 1885} 1886 1887static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, 1888 size_t len) 1889{ 1890 int err; 1891 struct sock *sk = sock->sk; 1892 1893 err = sock_error(sk); 1894 if (err) 1895 return err; 1896 1897 if (sk->sk_state != TCP_ESTABLISHED) 1898 return -ENOTCONN; 1899 1900 if (msg->msg_namelen) 1901 msg->msg_namelen = 0; 1902 1903 return unix_dgram_sendmsg(sock, msg, len); 1904} 1905 1906static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, 1907 size_t size, int flags) 1908{ 1909 struct sock *sk = sock->sk; 1910 1911 if (sk->sk_state != TCP_ESTABLISHED) 1912 return -ENOTCONN; 1913 1914 return unix_dgram_recvmsg(sock, msg, size, flags); 1915} 1916 1917static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 1918{ 1919 struct unix_sock *u = unix_sk(sk); 1920 1921 if (u->addr) { 1922 msg->msg_namelen = u->addr->len; 1923 memcpy(msg->msg_name, u->addr->name, u->addr->len); 1924 } 1925} 1926 1927static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, 1928 size_t size, int flags) 1929{ 1930 struct scm_cookie scm; 1931 struct sock *sk = sock->sk; 1932 struct unix_sock *u = unix_sk(sk); 1933 int noblock = flags & MSG_DONTWAIT; 1934 struct sk_buff *skb; 1935 int err; 1936 int peeked, skip; 1937 1938 err = -EOPNOTSUPP; 1939 if (flags&MSG_OOB) 1940 goto out; 1941 1942 err = mutex_lock_interruptible(&u->readlock); 1943 if (unlikely(err)) { 1944 /* recvmsg() in non blocking mode is supposed to return -EAGAIN 1945 * sk_rcvtimeo is not honored by mutex_lock_interruptible() 1946 */ 1947 err = noblock ? -EAGAIN : -ERESTARTSYS; 1948 goto out; 1949 } 1950 1951 skip = sk_peek_offset(sk, flags); 1952 1953 skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err); 1954 if (!skb) { 1955 unix_state_lock(sk); 1956 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ 1957 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && 1958 (sk->sk_shutdown & RCV_SHUTDOWN)) 1959 err = 0; 1960 unix_state_unlock(sk); 1961 goto out_unlock; 1962 } 1963 1964 wake_up_interruptible_sync_poll(&u->peer_wait, 1965 POLLOUT | POLLWRNORM | POLLWRBAND); 1966 1967 if (msg->msg_name) 1968 unix_copy_addr(msg, skb->sk); 1969 1970 if (size > skb->len - skip) 1971 size = skb->len - skip; 1972 else if (size < skb->len - skip) 1973 msg->msg_flags |= MSG_TRUNC; 1974 1975 err = skb_copy_datagram_msg(skb, skip, msg, size); 1976 if (err) 1977 goto out_free; 1978 1979 if (sock_flag(sk, SOCK_RCVTSTAMP)) 1980 __sock_recv_timestamp(msg, sk, skb); 1981 1982 memset(&scm, 0, sizeof(scm)); 1983 1984 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 1985 unix_set_secdata(&scm, skb); 1986 1987 if (!(flags & MSG_PEEK)) { 1988 if (UNIXCB(skb).fp) 1989 unix_detach_fds(&scm, skb); 1990 1991 sk_peek_offset_bwd(sk, skb->len); 1992 } else { 1993 /* It is questionable: on PEEK we could: 1994 - do not return fds - good, but too simple 8) 1995 - return fds, and do not return them on read (old strategy, 1996 apparently wrong) 1997 - clone fds (I chose it for now, it is the most universal 1998 solution) 1999 2000 POSIX 1003.1g does not actually define this clearly 2001 at all. POSIX 1003.1g doesn't define a lot of things 2002 clearly however! 2003 2004 */ 2005 2006 sk_peek_offset_fwd(sk, size); 2007 2008 if (UNIXCB(skb).fp) 2009 scm.fp = scm_fp_dup(UNIXCB(skb).fp); 2010 } 2011 err = (flags & MSG_TRUNC) ? skb->len - skip : size; 2012 2013 scm_recv(sock, msg, &scm, flags); 2014 2015out_free: 2016 skb_free_datagram(sk, skb); 2017out_unlock: 2018 mutex_unlock(&u->readlock); 2019out: 2020 return err; 2021} 2022 2023/* 2024 * Sleep until more data has arrived. But check for races.. 2025 */ 2026static long unix_stream_data_wait(struct sock *sk, long timeo, 2027 struct sk_buff *last) 2028{ 2029 DEFINE_WAIT(wait); 2030 2031 unix_state_lock(sk); 2032 2033 for (;;) { 2034 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 2035 2036 if (skb_peek_tail(&sk->sk_receive_queue) != last || 2037 sk->sk_err || 2038 (sk->sk_shutdown & RCV_SHUTDOWN) || 2039 signal_pending(current) || 2040 !timeo) 2041 break; 2042 2043 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 2044 unix_state_unlock(sk); 2045 timeo = freezable_schedule_timeout(timeo); 2046 unix_state_lock(sk); 2047 2048 if (sock_flag(sk, SOCK_DEAD)) 2049 break; 2050 2051 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 2052 } 2053 2054 finish_wait(sk_sleep(sk), &wait); 2055 unix_state_unlock(sk); 2056 return timeo; 2057} 2058 2059static unsigned int unix_skb_len(const struct sk_buff *skb) 2060{ 2061 return skb->len - UNIXCB(skb).consumed; 2062} 2063 2064static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, 2065 size_t size, int flags) 2066{ 2067 struct scm_cookie scm; 2068 struct sock *sk = sock->sk; 2069 struct unix_sock *u = unix_sk(sk); 2070 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); 2071 int copied = 0; 2072 int noblock = flags & MSG_DONTWAIT; 2073 int check_creds = 0; 2074 int target; 2075 int err = 0; 2076 long timeo; 2077 int skip; 2078 2079 err = -EINVAL; 2080 if (sk->sk_state != TCP_ESTABLISHED) 2081 goto out; 2082 2083 err = -EOPNOTSUPP; 2084 if (flags&MSG_OOB) 2085 goto out; 2086 2087 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); 2088 timeo = sock_rcvtimeo(sk, noblock); 2089 2090 /* Lock the socket to prevent queue disordering 2091 * while sleeps in memcpy_tomsg 2092 */ 2093 2094 memset(&scm, 0, sizeof(scm)); 2095 2096 mutex_lock(&u->readlock); 2097 2098 if (flags & MSG_PEEK) 2099 skip = sk_peek_offset(sk, flags); 2100 else 2101 skip = 0; 2102 2103 do { 2104 int chunk; 2105 struct sk_buff *skb, *last; 2106 2107 unix_state_lock(sk); 2108 if (sock_flag(sk, SOCK_DEAD)) { 2109 err = -ECONNRESET; 2110 goto unlock; 2111 } 2112 last = skb = skb_peek(&sk->sk_receive_queue); 2113again: 2114 if (skb == NULL) { 2115 unix_sk(sk)->recursion_level = 0; 2116 if (copied >= target) 2117 goto unlock; 2118 2119 /* 2120 * POSIX 1003.1g mandates this order. 2121 */ 2122 2123 err = sock_error(sk); 2124 if (err) 2125 goto unlock; 2126 if (sk->sk_shutdown & RCV_SHUTDOWN) 2127 goto unlock; 2128 2129 unix_state_unlock(sk); 2130 err = -EAGAIN; 2131 if (!timeo) 2132 break; 2133 mutex_unlock(&u->readlock); 2134 2135 timeo = unix_stream_data_wait(sk, timeo, last); 2136 2137 if (signal_pending(current)) { 2138 err = sock_intr_errno(timeo); 2139 scm_destroy(&scm); 2140 goto out; 2141 } 2142 2143 mutex_lock(&u->readlock); 2144 continue; 2145 unlock: 2146 unix_state_unlock(sk); 2147 break; 2148 } 2149 2150 while (skip >= unix_skb_len(skb)) { 2151 skip -= unix_skb_len(skb); 2152 last = skb; 2153 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2154 if (!skb) 2155 goto again; 2156 } 2157 2158 unix_state_unlock(sk); 2159 2160 if (check_creds) { 2161 /* Never glue messages from different writers */ 2162 if ((UNIXCB(skb).pid != scm.pid) || 2163 !uid_eq(UNIXCB(skb).uid, scm.creds.uid) || 2164 !gid_eq(UNIXCB(skb).gid, scm.creds.gid)) 2165 break; 2166 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { 2167 /* Copy credentials */ 2168 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2169 check_creds = 1; 2170 } 2171 2172 /* Copy address just once */ 2173 if (sunaddr) { 2174 unix_copy_addr(msg, skb->sk); 2175 sunaddr = NULL; 2176 } 2177 2178 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2179 if (skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, 2180 msg, chunk)) { 2181 if (copied == 0) 2182 copied = -EFAULT; 2183 break; 2184 } 2185 copied += chunk; 2186 size -= chunk; 2187 2188 /* Mark read part of skb as used */ 2189 if (!(flags & MSG_PEEK)) { 2190 UNIXCB(skb).consumed += chunk; 2191 2192 sk_peek_offset_bwd(sk, chunk); 2193 2194 if (UNIXCB(skb).fp) 2195 unix_detach_fds(&scm, skb); 2196 2197 if (unix_skb_len(skb)) 2198 break; 2199 2200 skb_unlink(skb, &sk->sk_receive_queue); 2201 consume_skb(skb); 2202 2203 if (scm.fp) 2204 break; 2205 } else { 2206 /* It is questionable, see note in unix_dgram_recvmsg. 2207 */ 2208 if (UNIXCB(skb).fp) 2209 scm.fp = scm_fp_dup(UNIXCB(skb).fp); 2210 2211 sk_peek_offset_fwd(sk, chunk); 2212 2213 if (UNIXCB(skb).fp) 2214 break; 2215 2216 skip = 0; 2217 last = skb; 2218 unix_state_lock(sk); 2219 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2220 if (skb) 2221 goto again; 2222 unix_state_unlock(sk); 2223 break; 2224 } 2225 } while (size); 2226 2227 mutex_unlock(&u->readlock); 2228 scm_recv(sock, msg, &scm, flags); 2229out: 2230 return copied ? : err; 2231} 2232 2233static int unix_shutdown(struct socket *sock, int mode) 2234{ 2235 struct sock *sk = sock->sk; 2236 struct sock *other; 2237 2238 if (mode < SHUT_RD || mode > SHUT_RDWR) 2239 return -EINVAL; 2240 /* This maps: 2241 * SHUT_RD (0) -> RCV_SHUTDOWN (1) 2242 * SHUT_WR (1) -> SEND_SHUTDOWN (2) 2243 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) 2244 */ 2245 ++mode; 2246 2247 unix_state_lock(sk); 2248 sk->sk_shutdown |= mode; 2249 other = unix_peer(sk); 2250 if (other) 2251 sock_hold(other); 2252 unix_state_unlock(sk); 2253 sk->sk_state_change(sk); 2254 2255 if (other && 2256 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 2257 2258 int peer_mode = 0; 2259 2260 if (mode&RCV_SHUTDOWN) 2261 peer_mode |= SEND_SHUTDOWN; 2262 if (mode&SEND_SHUTDOWN) 2263 peer_mode |= RCV_SHUTDOWN; 2264 unix_state_lock(other); 2265 other->sk_shutdown |= peer_mode; 2266 unix_state_unlock(other); 2267 other->sk_state_change(other); 2268 if (peer_mode == SHUTDOWN_MASK) 2269 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 2270 else if (peer_mode & RCV_SHUTDOWN) 2271 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 2272 } 2273 if (other) 2274 sock_put(other); 2275 2276 return 0; 2277} 2278 2279long unix_inq_len(struct sock *sk) 2280{ 2281 struct sk_buff *skb; 2282 long amount = 0; 2283 2284 if (sk->sk_state == TCP_LISTEN) 2285 return -EINVAL; 2286 2287 spin_lock(&sk->sk_receive_queue.lock); 2288 if (sk->sk_type == SOCK_STREAM || 2289 sk->sk_type == SOCK_SEQPACKET) { 2290 skb_queue_walk(&sk->sk_receive_queue, skb) 2291 amount += unix_skb_len(skb); 2292 } else { 2293 skb = skb_peek(&sk->sk_receive_queue); 2294 if (skb) 2295 amount = skb->len; 2296 } 2297 spin_unlock(&sk->sk_receive_queue.lock); 2298 2299 return amount; 2300} 2301EXPORT_SYMBOL_GPL(unix_inq_len); 2302 2303long unix_outq_len(struct sock *sk) 2304{ 2305 return sk_wmem_alloc_get(sk); 2306} 2307EXPORT_SYMBOL_GPL(unix_outq_len); 2308 2309static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 2310{ 2311 struct sock *sk = sock->sk; 2312 long amount = 0; 2313 int err; 2314 2315 switch (cmd) { 2316 case SIOCOUTQ: 2317 amount = unix_outq_len(sk); 2318 err = put_user(amount, (int __user *)arg); 2319 break; 2320 case SIOCINQ: 2321 amount = unix_inq_len(sk); 2322 if (amount < 0) 2323 err = amount; 2324 else 2325 err = put_user(amount, (int __user *)arg); 2326 break; 2327 default: 2328 err = -ENOIOCTLCMD; 2329 break; 2330 } 2331 return err; 2332} 2333 2334static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait) 2335{ 2336 struct sock *sk = sock->sk; 2337 unsigned int mask; 2338 2339 sock_poll_wait(file, sk_sleep(sk), wait); 2340 mask = 0; 2341 2342 /* exceptional events? */ 2343 if (sk->sk_err) 2344 mask |= POLLERR; 2345 if (sk->sk_shutdown == SHUTDOWN_MASK) 2346 mask |= POLLHUP; 2347 if (sk->sk_shutdown & RCV_SHUTDOWN) 2348 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 2349 2350 /* readable? */ 2351 if (!skb_queue_empty(&sk->sk_receive_queue)) 2352 mask |= POLLIN | POLLRDNORM; 2353 2354 /* Connection-based need to check for termination and startup */ 2355 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && 2356 sk->sk_state == TCP_CLOSE) 2357 mask |= POLLHUP; 2358 2359 /* 2360 * we set writable also when the other side has shut down the 2361 * connection. This prevents stuck sockets. 2362 */ 2363 if (unix_writable(sk)) 2364 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 2365 2366 return mask; 2367} 2368 2369static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, 2370 poll_table *wait) 2371{ 2372 struct sock *sk = sock->sk, *other; 2373 unsigned int mask, writable; 2374 2375 sock_poll_wait(file, sk_sleep(sk), wait); 2376 mask = 0; 2377 2378 /* exceptional events? */ 2379 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 2380 mask |= POLLERR | 2381 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); 2382 2383 if (sk->sk_shutdown & RCV_SHUTDOWN) 2384 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 2385 if (sk->sk_shutdown == SHUTDOWN_MASK) 2386 mask |= POLLHUP; 2387 2388 /* readable? */ 2389 if (!skb_queue_empty(&sk->sk_receive_queue)) 2390 mask |= POLLIN | POLLRDNORM; 2391 2392 /* Connection-based need to check for termination and startup */ 2393 if (sk->sk_type == SOCK_SEQPACKET) { 2394 if (sk->sk_state == TCP_CLOSE) 2395 mask |= POLLHUP; 2396 /* connection hasn't started yet? */ 2397 if (sk->sk_state == TCP_SYN_SENT) 2398 return mask; 2399 } 2400 2401 /* No write status requested, avoid expensive OUT tests. */ 2402 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT))) 2403 return mask; 2404 2405 writable = unix_writable(sk); 2406 if (writable) { 2407 unix_state_lock(sk); 2408 2409 other = unix_peer(sk); 2410 if (other && unix_peer(other) != sk && 2411 unix_recvq_full(other) && 2412 unix_dgram_peer_wake_me(sk, other)) 2413 writable = 0; 2414 2415 unix_state_unlock(sk); 2416 } 2417 2418 if (writable) 2419 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 2420 else 2421 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 2422 2423 return mask; 2424} 2425 2426#ifdef CONFIG_PROC_FS 2427 2428#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) 2429 2430#define get_bucket(x) ((x) >> BUCKET_SPACE) 2431#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1)) 2432#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) 2433 2434static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) 2435{ 2436 unsigned long offset = get_offset(*pos); 2437 unsigned long bucket = get_bucket(*pos); 2438 struct sock *sk; 2439 unsigned long count = 0; 2440 2441 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) { 2442 if (sock_net(sk) != seq_file_net(seq)) 2443 continue; 2444 if (++count == offset) 2445 break; 2446 } 2447 2448 return sk; 2449} 2450 2451static struct sock *unix_next_socket(struct seq_file *seq, 2452 struct sock *sk, 2453 loff_t *pos) 2454{ 2455 unsigned long bucket; 2456 2457 while (sk > (struct sock *)SEQ_START_TOKEN) { 2458 sk = sk_next(sk); 2459 if (!sk) 2460 goto next_bucket; 2461 if (sock_net(sk) == seq_file_net(seq)) 2462 return sk; 2463 } 2464 2465 do { 2466 sk = unix_from_bucket(seq, pos); 2467 if (sk) 2468 return sk; 2469 2470next_bucket: 2471 bucket = get_bucket(*pos) + 1; 2472 *pos = set_bucket_offset(bucket, 1); 2473 } while (bucket < ARRAY_SIZE(unix_socket_table)); 2474 2475 return NULL; 2476} 2477 2478static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 2479 __acquires(unix_table_lock) 2480{ 2481 spin_lock(&unix_table_lock); 2482 2483 if (!*pos) 2484 return SEQ_START_TOKEN; 2485 2486 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table)) 2487 return NULL; 2488 2489 return unix_next_socket(seq, NULL, pos); 2490} 2491 2492static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2493{ 2494 ++*pos; 2495 return unix_next_socket(seq, v, pos); 2496} 2497 2498static void unix_seq_stop(struct seq_file *seq, void *v) 2499 __releases(unix_table_lock) 2500{ 2501 spin_unlock(&unix_table_lock); 2502} 2503 2504static int unix_seq_show(struct seq_file *seq, void *v) 2505{ 2506 2507 if (v == SEQ_START_TOKEN) 2508 seq_puts(seq, "Num RefCount Protocol Flags Type St " 2509 "Inode Path\n"); 2510 else { 2511 struct sock *s = v; 2512 struct unix_sock *u = unix_sk(s); 2513 unix_state_lock(s); 2514 2515 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", 2516 s, 2517 atomic_read(&s->sk_refcnt), 2518 0, 2519 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, 2520 s->sk_type, 2521 s->sk_socket ? 2522 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : 2523 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), 2524 sock_i_ino(s)); 2525 2526 if (u->addr) { 2527 int i, len; 2528 seq_putc(seq, ' '); 2529 2530 i = 0; 2531 len = u->addr->len - sizeof(short); 2532 if (!UNIX_ABSTRACT(s)) 2533 len--; 2534 else { 2535 seq_putc(seq, '@'); 2536 i++; 2537 } 2538 for ( ; i < len; i++) 2539 seq_putc(seq, u->addr->name->sun_path[i]); 2540 } 2541 unix_state_unlock(s); 2542 seq_putc(seq, '\n'); 2543 } 2544 2545 return 0; 2546} 2547 2548static const struct seq_operations unix_seq_ops = { 2549 .start = unix_seq_start, 2550 .next = unix_seq_next, 2551 .stop = unix_seq_stop, 2552 .show = unix_seq_show, 2553}; 2554 2555static int unix_seq_open(struct inode *inode, struct file *file) 2556{ 2557 return seq_open_net(inode, file, &unix_seq_ops, 2558 sizeof(struct seq_net_private)); 2559} 2560 2561static const struct file_operations unix_seq_fops = { 2562 .owner = THIS_MODULE, 2563 .open = unix_seq_open, 2564 .read = seq_read, 2565 .llseek = seq_lseek, 2566 .release = seq_release_net, 2567}; 2568 2569#endif 2570 2571static const struct net_proto_family unix_family_ops = { 2572 .family = PF_UNIX, 2573 .create = unix_create, 2574 .owner = THIS_MODULE, 2575}; 2576 2577 2578static int __net_init unix_net_init(struct net *net) 2579{ 2580 int error = -ENOMEM; 2581 2582 net->unx.sysctl_max_dgram_qlen = 10; 2583 if (unix_sysctl_register(net)) 2584 goto out; 2585 2586#ifdef CONFIG_PROC_FS 2587 if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) { 2588 unix_sysctl_unregister(net); 2589 goto out; 2590 } 2591#endif 2592 error = 0; 2593out: 2594 return error; 2595} 2596 2597static void __net_exit unix_net_exit(struct net *net) 2598{ 2599 unix_sysctl_unregister(net); 2600 remove_proc_entry("unix", net->proc_net); 2601} 2602 2603static struct pernet_operations unix_net_ops = { 2604 .init = unix_net_init, 2605 .exit = unix_net_exit, 2606}; 2607 2608static int __init af_unix_init(void) 2609{ 2610 int rc = -1; 2611 2612 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb)); 2613 2614 rc = proto_register(&unix_proto, 1); 2615 if (rc != 0) { 2616 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 2617 goto out; 2618 } 2619 2620 sock_register(&unix_family_ops); 2621 register_pernet_subsys(&unix_net_ops); 2622out: 2623 return rc; 2624} 2625 2626static void __exit af_unix_exit(void) 2627{ 2628 sock_unregister(PF_UNIX); 2629 proto_unregister(&unix_proto); 2630 unregister_pernet_subsys(&unix_net_ops); 2631} 2632 2633/* Earlier than device_initcall() so that other drivers invoking 2634 request_module() don't end up in a loop when modprobe tries 2635 to use a UNIX socket. But later than subsys_initcall() because 2636 we depend on stuff initialised there */ 2637fs_initcall(af_unix_init); 2638module_exit(af_unix_exit); 2639 2640MODULE_LICENSE("GPL"); 2641MODULE_ALIAS_NETPROTO(PF_UNIX); 2642