root/include/linux/scif.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


   1 /*
   2  * Intel MIC Platform Software Stack (MPSS)
   3  *
   4  * This file is provided under a dual BSD/GPLv2 license.  When using or
   5  * redistributing this file, you may do so under either license.
   6  *
   7  * GPL LICENSE SUMMARY
   8  *
   9  * Copyright(c) 2014 Intel Corporation.
  10  *
  11  * This program is free software; you can redistribute it and/or modify
  12  * it under the terms of version 2 of the GNU General Public License as
  13  * published by the Free Software Foundation.
  14  *
  15  * This program is distributed in the hope that it will be useful, but
  16  * WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * General Public License for more details.
  19  *
  20  * BSD LICENSE
  21  *
  22  * Copyright(c) 2014 Intel Corporation.
  23  *
  24  * Redistribution and use in source and binary forms, with or without
  25  * modification, are permitted provided that the following conditions
  26  * are met:
  27  *
  28  * * Redistributions of source code must retain the above copyright
  29  *   notice, this list of conditions and the following disclaimer.
  30  * * Redistributions in binary form must reproduce the above copyright
  31  *   notice, this list of conditions and the following disclaimer in
  32  *   the documentation and/or other materials provided with the
  33  *   distribution.
  34  * * Neither the name of Intel Corporation nor the names of its
  35  *   contributors may be used to endorse or promote products derived
  36  *   from this software without specific prior written permission.
  37  *
  38  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  39  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  40  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  41  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  42  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  43  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  44  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  45  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  46  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  47  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  48  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  49  *
  50  * Intel SCIF driver.
  51  *
  52  */
  53 #ifndef __SCIF_H__
  54 #define __SCIF_H__
  55 
  56 #include <linux/types.h>
  57 #include <linux/poll.h>
  58 #include <linux/device.h>
  59 #include <linux/scif_ioctl.h>
  60 
  61 #define SCIF_ACCEPT_SYNC        1
  62 #define SCIF_SEND_BLOCK         1
  63 #define SCIF_RECV_BLOCK         1
  64 
  65 enum {
  66         SCIF_PROT_READ = (1 << 0),
  67         SCIF_PROT_WRITE = (1 << 1)
  68 };
  69 
  70 enum {
  71         SCIF_MAP_FIXED = 0x10,
  72         SCIF_MAP_KERNEL = 0x20,
  73 };
  74 
  75 enum {
  76         SCIF_FENCE_INIT_SELF = (1 << 0),
  77         SCIF_FENCE_INIT_PEER = (1 << 1),
  78         SCIF_SIGNAL_LOCAL = (1 << 4),
  79         SCIF_SIGNAL_REMOTE = (1 << 5)
  80 };
  81 
  82 enum {
  83         SCIF_RMA_USECPU = (1 << 0),
  84         SCIF_RMA_USECACHE = (1 << 1),
  85         SCIF_RMA_SYNC = (1 << 2),
  86         SCIF_RMA_ORDERED = (1 << 3)
  87 };
  88 
  89 /* End of SCIF Admin Reserved Ports */
  90 #define SCIF_ADMIN_PORT_END     1024
  91 
  92 /* End of SCIF Reserved Ports */
  93 #define SCIF_PORT_RSVD          1088
  94 
  95 typedef struct scif_endpt *scif_epd_t;
  96 typedef struct scif_pinned_pages *scif_pinned_pages_t;
  97 
  98 /**
  99  * struct scif_range - SCIF registered range used in kernel mode
 100  * @cookie: cookie used internally by SCIF
 101  * @nr_pages: number of pages of PAGE_SIZE
 102  * @prot_flags: R/W protection
 103  * @phys_addr: Array of bus addresses
 104  * @va: Array of kernel virtual addresses backed by the pages in the phys_addr
 105  *      array. The va is populated only when called on the host for a remote
 106  *      SCIF connection on MIC. This is required to support the use case of DMA
 107  *      between MIC and another device which is not a SCIF node e.g., an IB or
 108  *      ethernet NIC.
 109  */
 110 struct scif_range {
 111         void *cookie;
 112         int nr_pages;
 113         int prot_flags;
 114         dma_addr_t *phys_addr;
 115         void __iomem **va;
 116 };
 117 
 118 /**
 119  * struct scif_pollepd - SCIF endpoint to be monitored via scif_poll
 120  * @epd: SCIF endpoint
 121  * @events: requested events
 122  * @revents: returned events
 123  */
 124 struct scif_pollepd {
 125         scif_epd_t epd;
 126         __poll_t events;
 127         __poll_t revents;
 128 };
 129 
 130 /**
 131  * scif_peer_dev - representation of a peer SCIF device
 132  *
 133  * Peer devices show up as PCIe devices for the mgmt node but not the cards.
 134  * The mgmt node discovers all the cards on the PCIe bus and informs the other
 135  * cards about their peers. Upon notification of a peer a node adds a peer
 136  * device to the peer bus to maintain symmetry in the way devices are
 137  * discovered across all nodes in the SCIF network.
 138  *
 139  * @dev: underlying device
 140  * @dnode - The destination node which this device will communicate with.
 141  */
 142 struct scif_peer_dev {
 143         struct device dev;
 144         u8 dnode;
 145 };
 146 
 147 /**
 148  * scif_client - representation of a SCIF client
 149  * @name: client name
 150  * @probe - client method called when a peer device is registered
 151  * @remove - client method called when a peer device is unregistered
 152  * @si - subsys_interface used internally for implementing SCIF clients
 153  */
 154 struct scif_client {
 155         const char *name;
 156         void (*probe)(struct scif_peer_dev *spdev);
 157         void (*remove)(struct scif_peer_dev *spdev);
 158         struct subsys_interface si;
 159 };
 160 
 161 #define SCIF_OPEN_FAILED ((scif_epd_t)-1)
 162 #define SCIF_REGISTER_FAILED ((off_t)-1)
 163 #define SCIF_MMAP_FAILED ((void *)-1)
 164 
 165 /**
 166  * scif_open() - Create an endpoint
 167  *
 168  * Return:
 169  * Upon successful completion, scif_open() returns an endpoint descriptor to
 170  * be used in subsequent SCIF functions calls to refer to that endpoint;
 171  * otherwise in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is
 172  * returned and errno is set to indicate the error; in kernel mode a NULL
 173  * scif_epd_t is returned.
 174  *
 175  * Errors:
 176  * ENOMEM - Insufficient kernel memory was available
 177  */
 178 scif_epd_t scif_open(void);
 179 
 180 /**
 181  * scif_bind() - Bind an endpoint to a port
 182  * @epd:        endpoint descriptor
 183  * @pn:         port number
 184  *
 185  * scif_bind() binds endpoint epd to port pn, where pn is a port number on the
 186  * local node. If pn is zero, a port number greater than or equal to
 187  * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to
 188  * exactly one local port. Ports less than 1024 when requested can only be bound
 189  * by system (or root) processes or by processes executed by privileged users.
 190  *
 191  * Return:
 192  * Upon successful completion, scif_bind() returns the port number to which epd
 193  * is bound; otherwise in user mode -1 is returned and errno is set to
 194  * indicate the error; in kernel mode the negative of one of the following
 195  * errors is returned.
 196  *
 197  * Errors:
 198  * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 199  * EINVAL - the endpoint or the port is already bound
 200  * EISCONN - The endpoint is already connected
 201  * ENOSPC - No port number available for assignment
 202  * EACCES - The port requested is protected and the user is not the superuser
 203  */
 204 int scif_bind(scif_epd_t epd, u16 pn);
 205 
 206 /**
 207  * scif_listen() - Listen for connections on an endpoint
 208  * @epd:        endpoint descriptor
 209  * @backlog:    maximum pending connection requests
 210  *
 211  * scif_listen() marks the endpoint epd as a listening endpoint - that is, as
 212  * an endpoint that will be used to accept incoming connection requests. Once
 213  * so marked, the endpoint is said to be in the listening state and may not be
 214  * used as the endpoint of a connection.
 215  *
 216  * The endpoint, epd, must have been bound to a port.
 217  *
 218  * The backlog argument defines the maximum length to which the queue of
 219  * pending connections for epd may grow. If a connection request arrives when
 220  * the queue is full, the client may receive an error with an indication that
 221  * the connection was refused.
 222  *
 223  * Return:
 224  * Upon successful completion, scif_listen() returns 0; otherwise in user mode
 225  * -1 is returned and errno is set to indicate the error; in kernel mode the
 226  * negative of one of the following errors is returned.
 227  *
 228  * Errors:
 229  * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 230  * EINVAL - the endpoint is not bound to a port
 231  * EISCONN - The endpoint is already connected or listening
 232  */
 233 int scif_listen(scif_epd_t epd, int backlog);
 234 
 235 /**
 236  * scif_connect() - Initiate a connection on a port
 237  * @epd:        endpoint descriptor
 238  * @dst:        global id of port to which to connect
 239  *
 240  * The scif_connect() function requests the connection of endpoint epd to remote
 241  * port dst. If the connection is successful, a peer endpoint, bound to dst, is
 242  * created on node dst.node. On successful return, the connection is complete.
 243  *
 244  * If the endpoint epd has not already been bound to a port, scif_connect()
 245  * will bind it to an unused local port.
 246  *
 247  * A connection is terminated when an endpoint of the connection is closed,
 248  * either explicitly by scif_close(), or when a process that owns one of the
 249  * endpoints of the connection is terminated.
 250  *
 251  * In user space, scif_connect() supports an asynchronous connection mode
 252  * if the application has set the O_NONBLOCK flag on the endpoint via the
 253  * fcntl() system call. Setting this flag will result in the calling process
 254  * not to wait during scif_connect().
 255  *
 256  * Return:
 257  * Upon successful completion, scif_connect() returns the port ID to which the
 258  * endpoint, epd, is bound; otherwise in user mode -1 is returned and errno is
 259  * set to indicate the error; in kernel mode the negative of one of the
 260  * following errors is returned.
 261  *
 262  * Errors:
 263  * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 264  * ECONNREFUSED - The destination was not listening for connections or refused
 265  * the connection request
 266  * EINVAL - dst.port is not a valid port ID
 267  * EISCONN - The endpoint is already connected
 268  * ENOMEM - No buffer space is available
 269  * ENODEV - The destination node does not exist, or the node is lost or existed,
 270  * but is not currently in the network since it may have crashed
 271  * ENOSPC - No port number available for assignment
 272  * EOPNOTSUPP - The endpoint is listening and cannot be connected
 273  */
 274 int scif_connect(scif_epd_t epd, struct scif_port_id *dst);
 275 
 276 /**
 277  * scif_accept() - Accept a connection on an endpoint
 278  * @epd:        endpoint descriptor
 279  * @peer:       global id of port to which connected
 280  * @newepd:     new connected endpoint descriptor
 281  * @flags:      flags
 282  *
 283  * The scif_accept() call extracts the first connection request from the queue
 284  * of pending connections for the port on which epd is listening. scif_accept()
 285  * creates a new endpoint, bound to the same port as epd, and allocates a new
 286  * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new
 287  * endpoint is connected to the endpoint through which the connection was
 288  * requested. epd is unaffected by this call, and remains in the listening
 289  * state.
 290  *
 291  * On successful return, peer holds the global port identifier (node id and
 292  * local port number) of the port which requested the connection.
 293  *
 294  * A connection is terminated when an endpoint of the connection is closed,
 295  * either explicitly by scif_close(), or when a process that owns one of the
 296  * endpoints of the connection is terminated.
 297  *
 298  * The number of connections that can (subsequently) be accepted on epd is only
 299  * limited by system resources (memory).
 300  *
 301  * The flags argument is formed by OR'ing together zero or more of the
 302  * following values.
 303  * SCIF_ACCEPT_SYNC - block until a connection request is presented. If
 304  *                      SCIF_ACCEPT_SYNC is not in flags, and no pending
 305  *                      connections are present on the queue, scif_accept()
 306  *                      fails with an EAGAIN error
 307  *
 308  * In user mode, the select() and poll() functions can be used to determine
 309  * when there is a connection request. In kernel mode, the scif_poll()
 310  * function may be used for this purpose. A readable event will be delivered
 311  * when a connection is requested.
 312  *
 313  * Return:
 314  * Upon successful completion, scif_accept() returns 0; otherwise in user mode
 315  * -1 is returned and errno is set to indicate the error; in kernel mode the
 316  *      negative of one of the following errors is returned.
 317  *
 318  * Errors:
 319  * EAGAIN - SCIF_ACCEPT_SYNC is not set and no connections are present to be
 320  * accepted or SCIF_ACCEPT_SYNC is not set and remote node failed to complete
 321  * its connection request
 322  * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 323  * EINTR - Interrupted function
 324  * EINVAL - epd is not a listening endpoint, or flags is invalid, or peer is
 325  * NULL, or newepd is NULL
 326  * ENODEV - The requesting node is lost or existed, but is not currently in the
 327  * network since it may have crashed
 328  * ENOMEM - Not enough space
 329  * ENOENT - Secondary part of epd registration failed
 330  */
 331 int scif_accept(scif_epd_t epd, struct scif_port_id *peer, scif_epd_t
 332                 *newepd, int flags);
 333 
 334 /**
 335  * scif_close() - Close an endpoint
 336  * @epd:        endpoint descriptor
 337  *
 338  * scif_close() closes an endpoint and performs necessary teardown of
 339  * facilities associated with that endpoint.
 340  *
 341  * If epd is a listening endpoint then it will no longer accept connection
 342  * requests on the port to which it is bound. Any pending connection requests
 343  * are rejected.
 344  *
 345  * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs
 346  * which are in-process through epd or its peer endpoint will complete before
 347  * scif_close() returns. Registered windows of the local and peer endpoints are
 348  * released as if scif_unregister() was called against each window.
 349  *
 350  * Closing a SCIF endpoint does not affect local registered memory mapped by
 351  * a SCIF endpoint on a remote node. The local memory remains mapped by the peer
 352  * SCIF endpoint explicitly removed by calling munmap(..) by the peer.
 353  *
 354  * If the peer endpoint's receive queue is not empty at the time that epd is
 355  * closed, then the peer endpoint can be passed as the endpoint parameter to
 356  * scif_recv() until the receive queue is empty.
 357  *
 358  * epd is freed and may no longer be accessed.
 359  *
 360  * Return:
 361  * Upon successful completion, scif_close() returns 0; otherwise in user mode
 362  * -1 is returned and errno is set to indicate the error; in kernel mode the
 363  * negative of one of the following errors is returned.
 364  *
 365  * Errors:
 366  * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 367  */
 368 int scif_close(scif_epd_t epd);
 369 
 370 /**
 371  * scif_send() - Send a message
 372  * @epd:        endpoint descriptor
 373  * @msg:        message buffer address
 374  * @len:        message length
 375  * @flags:      blocking mode flags
 376  *
 377  * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data
 378  * are copied from memory starting at address msg. On successful execution the
 379  * return value of scif_send() is the number of bytes that were sent, and is
 380  * zero if no bytes were sent because len was zero. scif_send() may be called
 381  * only when the endpoint is in a connected state.
 382  *
 383  * If a scif_send() call is non-blocking, then it sends only those bytes which
 384  * can be sent without waiting, up to a maximum of len bytes.
 385  *
 386  * If a scif_send() call is blocking, then it normally returns after sending
 387  * all len bytes. If a blocking call is interrupted or the connection is
 388  * reset, the call is considered successful if some bytes were sent or len is
 389  * zero, otherwise the call is considered unsuccessful.
 390  *
 391  * In user mode, the select() and poll() functions can be used to determine
 392  * when the send queue is not full. In kernel mode, the scif_poll() function
 393  * may be used for this purpose.
 394  *
 395  * It is recommended that scif_send()/scif_recv() only be used for short
 396  * control-type message communication between SCIF endpoints. The SCIF RMA
 397  * APIs are expected to provide better performance for transfer sizes of
 398  * 1024 bytes or longer for the current MIC hardware and software
 399  * implementation.
 400  *
 401  * scif_send() will block until the entire message is sent if SCIF_SEND_BLOCK
 402  * is passed as the flags argument.
 403  *
 404  * Return:
 405  * Upon successful completion, scif_send() returns the number of bytes sent;
 406  * otherwise in user mode -1 is returned and errno is set to indicate the
 407  * error; in kernel mode the negative of one of the following errors is
 408  * returned.
 409  *
 410  * Errors:
 411  * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 412  * ECONNRESET - Connection reset by peer
 413  * EINVAL - flags is invalid, or len is negative
 414  * ENODEV - The remote node is lost or existed, but is not currently in the
 415  * network since it may have crashed
 416  * ENOMEM - Not enough space
 417  * ENOTCONN - The endpoint is not connected
 418  */
 419 int scif_send(scif_epd_t epd, void *msg, int len, int flags);
 420 
 421 /**
 422  * scif_recv() - Receive a message
 423  * @epd:        endpoint descriptor
 424  * @msg:        message buffer address
 425  * @len:        message buffer length
 426  * @flags:      blocking mode flags
 427  *
 428  * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of
 429  * data are copied to memory starting at address msg. On successful execution
 430  * the return value of scif_recv() is the number of bytes that were received,
 431  * and is zero if no bytes were received because len was zero. scif_recv() may
 432  * be called only when the endpoint is in a connected state.
 433  *
 434  * If a scif_recv() call is non-blocking, then it receives only those bytes
 435  * which can be received without waiting, up to a maximum of len bytes.
 436  *
 437  * If a scif_recv() call is blocking, then it normally returns after receiving
 438  * all len bytes. If the blocking call was interrupted due to a disconnection,
 439  * subsequent calls to scif_recv() will copy all bytes received upto the point
 440  * of disconnection.
 441  *
 442  * In user mode, the select() and poll() functions can be used to determine
 443  * when data is available to be received. In kernel mode, the scif_poll()
 444  * function may be used for this purpose.
 445  *
 446  * It is recommended that scif_send()/scif_recv() only be used for short
 447  * control-type message communication between SCIF endpoints. The SCIF RMA
 448  * APIs are expected to provide better performance for transfer sizes of
 449  * 1024 bytes or longer for the current MIC hardware and software
 450  * implementation.
 451  *
 452  * scif_recv() will block until the entire message is received if
 453  * SCIF_RECV_BLOCK is passed as the flags argument.
 454  *
 455  * Return:
 456  * Upon successful completion, scif_recv() returns the number of bytes
 457  * received; otherwise in user mode -1 is returned and errno is set to
 458  * indicate the error; in kernel mode the negative of one of the following
 459  * errors is returned.
 460  *
 461  * Errors:
 462  * EAGAIN - The destination node is returning from a low power state
 463  * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 464  * ECONNRESET - Connection reset by peer
 465  * EINVAL - flags is invalid, or len is negative
 466  * ENODEV - The remote node is lost or existed, but is not currently in the
 467  * network since it may have crashed
 468  * ENOMEM - Not enough space
 469  * ENOTCONN - The endpoint is not connected
 470  */
 471 int scif_recv(scif_epd_t epd, void *msg, int len, int flags);
 472 
 473 /**
 474  * scif_register() - Mark a memory region for remote access.
 475  * @epd:                endpoint descriptor
 476  * @addr:               starting virtual address
 477  * @len:                length of range
 478  * @offset:             offset of window
 479  * @prot_flags:         read/write protection flags
 480  * @map_flags:          mapping flags
 481  *
 482  * The scif_register() function opens a window, a range of whole pages of the
 483  * registered address space of the endpoint epd, starting at offset po and
 484  * continuing for len bytes. The value of po, further described below, is a
 485  * function of the parameters offset and len, and the value of map_flags. Each
 486  * page of the window represents the physical memory page which backs the
 487  * corresponding page of the range of virtual address pages starting at addr
 488  * and continuing for len bytes. addr and len are constrained to be multiples
 489  * of the page size. A successful scif_register() call returns po.
 490  *
 491  * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset
 492  * exactly, and offset is constrained to be a multiple of the page size. The
 493  * mapping established by scif_register() will not replace any existing
 494  * registration; an error is returned if any page within the range [offset,
 495  * offset + len - 1] intersects an existing window.
 496  *
 497  * When SCIF_MAP_FIXED is not set, the implementation uses offset in an
 498  * implementation-defined manner to arrive at po. The po value so chosen will
 499  * be an area of the registered address space that the implementation deems
 500  * suitable for a mapping of len bytes. An offset value of 0 is interpreted as
 501  * granting the implementation complete freedom in selecting po, subject to
 502  * constraints described below. A non-zero value of offset is taken to be a
 503  * suggestion of an offset near which the mapping should be placed. When the
 504  * implementation selects a value for po, it does not replace any extant
 505  * window. In all cases, po will be a multiple of the page size.
 506  *
 507  * The physical pages which are so represented by a window are available for
 508  * access in calls to mmap(), scif_readfrom(), scif_writeto(),
 509  * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the
 510  * physical pages represented by the window will not be reused by the memory
 511  * subsystem for any other purpose. Note that the same physical page may be
 512  * represented by multiple windows.
 513  *
 514  * Subsequent operations which change the memory pages to which virtual
 515  * addresses are mapped (such as mmap(), munmap()) have no effect on
 516  * existing window.
 517  *
 518  * If the process will fork(), it is recommended that the registered
 519  * virtual address range be marked with MADV_DONTFORK. Doing so will prevent
 520  * problems due to copy-on-write semantics.
 521  *
 522  * The prot_flags argument is formed by OR'ing together one or more of the
 523  * following values.
 524  * SCIF_PROT_READ - allow read operations from the window
 525  * SCIF_PROT_WRITE - allow write operations to the window
 526  *
 527  * Return:
 528  * Upon successful completion, scif_register() returns the offset at which the
 529  * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that
 530  * is (off_t *)-1) is returned and errno is set to indicate the error; in
 531  * kernel mode the negative of one of the following errors is returned.
 532  *
 533  * Errors:
 534  * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags, and pages in the range
 535  * [offset, offset + len -1] are already registered
 536  * EAGAIN - The mapping could not be performed due to lack of resources
 537  * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 538  * ECONNRESET - Connection reset by peer
 539  * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is
 540  * set in flags, and offset is not a multiple of the page size, or addr is not a
 541  * multiple of the page size, or len is not a multiple of the page size, or is
 542  * 0, or offset is negative
 543  * ENODEV - The remote node is lost or existed, but is not currently in the
 544  * network since it may have crashed
 545  * ENOMEM - Not enough space
 546  * ENOTCONN -The endpoint is not connected
 547  */
 548 off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
 549                     int prot_flags, int map_flags);
 550 
 551 /**
 552  * scif_unregister() - Mark a memory region for remote access.
 553  * @epd:        endpoint descriptor
 554  * @offset:     start of range to unregister
 555  * @len:        length of range to unregister
 556  *
 557  * The scif_unregister() function closes those previously registered windows
 558  * which are entirely within the range [offset, offset + len - 1]. It is an
 559  * error to specify a range which intersects only a subrange of a window.
 560  *
 561  * On a successful return, pages within the window may no longer be specified
 562  * in calls to mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(),
 563  * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window,
 564  * however, continues to exist until all previous references against it are
 565  * removed. A window is referenced if there is a mapping to it created by
 566  * mmap(), or if scif_get_pages() was called against the window
 567  * (and the pages have not been returned via scif_put_pages()). A window is
 568  * also referenced while an RMA, in which some range of the window is a source
 569  * or destination, is in progress. Finally a window is referenced while some
 570  * offset in that window was specified to scif_fence_signal(), and the RMAs
 571  * marked by that call to scif_fence_signal() have not completed. While a
 572  * window is in this state, its registered address space pages are not
 573  * available for use in a new registered window.
 574  *
 575  * When all such references to the window have been removed, its references to
 576  * all the physical pages which it represents are removed. Similarly, the
 577  * registered address space pages of the window become available for
 578  * registration in a new window.
 579  *
 580  * Return:
 581  * Upon successful completion, scif_unregister() returns 0; otherwise in user
 582  * mode -1 is returned and errno is set to indicate the error; in kernel mode
 583  * the negative of one of the following errors is returned. In the event of an
 584  * error, no windows are unregistered.
 585  *
 586  * Errors:
 587  * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 588  * ECONNRESET - Connection reset by peer
 589  * EINVAL - the range [offset, offset + len - 1] intersects a subrange of a
 590  * window, or offset is negative
 591  * ENODEV - The remote node is lost or existed, but is not currently in the
 592  * network since it may have crashed
 593  * ENOTCONN - The endpoint is not connected
 594  * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid for the
 595  * registered address space of epd
 596  */
 597 int scif_unregister(scif_epd_t epd, off_t offset, size_t len);
 598 
 599 /**
 600  * scif_readfrom() - Copy from a remote address space
 601  * @epd:        endpoint descriptor
 602  * @loffset:    offset in local registered address space to
 603  *              which to copy
 604  * @len:        length of range to copy
 605  * @roffset:    offset in remote registered address space
 606  *              from which to copy
 607  * @rma_flags:  transfer mode flags
 608  *
 609  * scif_readfrom() copies len bytes from the remote registered address space of
 610  * the peer of endpoint epd, starting at the offset roffset to the local
 611  * registered address space of epd, starting at the offset loffset.
 612  *
 613  * Each of the specified ranges [loffset, loffset + len - 1] and [roffset,
 614  * roffset + len - 1] must be within some registered window or windows of the
 615  * local and remote nodes. A range may intersect multiple registered windows,
 616  * but only if those windows are contiguous in the registered address space.
 617  *
 618  * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
 619  * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
 620  * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the
 621  * transfer is complete. Otherwise, the transfer may be performed asynchron-
 622  * ously. The order in which any two asynchronous RMA operations complete
 623  * is non-deterministic. The synchronization functions, scif_fence_mark()/
 624  * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
 625  * the completion of asynchronous RMA operations on the same endpoint.
 626  *
 627  * The DMA transfer of individual bytes is not guaranteed to complete in
 628  * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
 629  * cacheline or partial cacheline of the source range will become visible on
 630  * the destination node after all other transferred data in the source
 631  * range has become visible on the destination node.
 632  *
 633  * The optimal DMA performance will likely be realized if both
 634  * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
 635  * performance will likely be realized if loffset and roffset are not
 636  * cacheline aligned but are separated by some multiple of 64. The lowest level
 637  * of performance is likely if loffset and roffset are not separated by a
 638  * multiple of 64.
 639  *
 640  * The rma_flags argument is formed by ORing together zero or more of the
 641  * following values.
 642  * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
 643  *      engine.
 644  * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
 645  *              transfer has completed. Passing this flag results in the
 646  *              current implementation busy waiting and consuming CPU cycles
 647  *              while the DMA transfer is in progress for best performance by
 648  *              avoiding the interrupt latency.
 649  * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
 650  *              the source range becomes visible on the destination node
 651  *              after all other transferred data in the source range has
 652  *              become visible on the destination
 653  *
 654  * Return:
 655  * Upon successful completion, scif_readfrom() returns 0; otherwise in user
 656  * mode -1 is returned and errno is set to indicate the error; in kernel mode
 657  * the negative of one of the following errors is returned.
 658  *
 659  * Errors:
 660  * EACCESS - Attempt to write to a read-only range
 661  * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 662  * ECONNRESET - Connection reset by peer
 663  * EINVAL - rma_flags is invalid
 664  * ENODEV - The remote node is lost or existed, but is not currently in the
 665  * network since it may have crashed
 666  * ENOTCONN - The endpoint is not connected
 667  * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered
 668  * address space of epd, or, The range [roffset, roffset + len - 1] is invalid
 669  * for the registered address space of the peer of epd, or loffset or roffset
 670  * is negative
 671  */
 672 int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t
 673                   roffset, int rma_flags);
 674 
 675 /**
 676  * scif_writeto() - Copy to a remote address space
 677  * @epd:        endpoint descriptor
 678  * @loffset:    offset in local registered address space
 679  *              from which to copy
 680  * @len:        length of range to copy
 681  * @roffset:    offset in remote registered address space to
 682  *              which to copy
 683  * @rma_flags:  transfer mode flags
 684  *
 685  * scif_writeto() copies len bytes from the local registered address space of
 686  * epd, starting at the offset loffset to the remote registered address space
 687  * of the peer of endpoint epd, starting at the offset roffset.
 688  *
 689  * Each of the specified ranges [loffset, loffset + len - 1] and [roffset,
 690  * roffset + len - 1] must be within some registered window or windows of the
 691  * local and remote nodes. A range may intersect multiple registered windows,
 692  * but only if those windows are contiguous in the registered address space.
 693  *
 694  * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
 695  * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
 696  * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the
 697  * transfer is complete. Otherwise, the transfer may be performed asynchron-
 698  * ously. The order in which any two asynchronous RMA operations complete
 699  * is non-deterministic. The synchronization functions, scif_fence_mark()/
 700  * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
 701  * the completion of asynchronous RMA operations on the same endpoint.
 702  *
 703  * The DMA transfer of individual bytes is not guaranteed to complete in
 704  * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
 705  * cacheline or partial cacheline of the source range will become visible on
 706  * the destination node after all other transferred data in the source
 707  * range has become visible on the destination node.
 708  *
 709  * The optimal DMA performance will likely be realized if both
 710  * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
 711  * performance will likely be realized if loffset and roffset are not cacheline
 712  * aligned but are separated by some multiple of 64. The lowest level of
 713  * performance is likely if loffset and roffset are not separated by a multiple
 714  * of 64.
 715  *
 716  * The rma_flags argument is formed by ORing together zero or more of the
 717  * following values.
 718  * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
 719  *                      engine.
 720  * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
 721  *              transfer has completed. Passing this flag results in the
 722  *              current implementation busy waiting and consuming CPU cycles
 723  *              while the DMA transfer is in progress for best performance by
 724  *              avoiding the interrupt latency.
 725  * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
 726  *              the source range becomes visible on the destination node
 727  *              after all other transferred data in the source range has
 728  *              become visible on the destination
 729  *
 730  * Return:
 731  * Upon successful completion, scif_readfrom() returns 0; otherwise in user
 732  * mode -1 is returned and errno is set to indicate the error; in kernel mode
 733  * the negative of one of the following errors is returned.
 734  *
 735  * Errors:
 736  * EACCESS - Attempt to write to a read-only range
 737  * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 738  * ECONNRESET - Connection reset by peer
 739  * EINVAL - rma_flags is invalid
 740  * ENODEV - The remote node is lost or existed, but is not currently in the
 741  * network since it may have crashed
 742  * ENOTCONN - The endpoint is not connected
 743  * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered
 744  * address space of epd, or, The range [roffset , roffset + len -1] is invalid
 745  * for the registered address space of the peer of epd, or loffset or roffset
 746  * is negative
 747  */
 748 int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t
 749                  roffset, int rma_flags);
 750 
 751 /**
 752  * scif_vreadfrom() - Copy from a remote address space
 753  * @epd:        endpoint descriptor
 754  * @addr:       address to which to copy
 755  * @len:        length of range to copy
 756  * @roffset:    offset in remote registered address space
 757  *              from which to copy
 758  * @rma_flags:  transfer mode flags
 759  *
 760  * scif_vreadfrom() copies len bytes from the remote registered address
 761  * space of the peer of endpoint epd, starting at the offset roffset, to local
 762  * memory, starting at addr.
 763  *
 764  * The specified range [roffset, roffset + len - 1] must be within some
 765  * registered window or windows of the remote nodes. The range may
 766  * intersect multiple registered windows, but only if those windows are
 767  * contiguous in the registered address space.
 768  *
 769  * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
 770  * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
 771  * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the
 772  * transfer is complete. Otherwise, the transfer may be performed asynchron-
 773  * ously. The order in which any two asynchronous RMA operations complete
 774  * is non-deterministic. The synchronization functions, scif_fence_mark()/
 775  * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
 776  * the completion of asynchronous RMA operations on the same endpoint.
 777  *
 778  * The DMA transfer of individual bytes is not guaranteed to complete in
 779  * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
 780  * cacheline or partial cacheline of the source range will become visible on
 781  * the destination node after all other transferred data in the source
 782  * range has become visible on the destination node.
 783  *
 784  * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
 785  * the specified local memory range may be remain in a pinned state even after
 786  * the specified transfer completes. This may reduce overhead if some or all of
 787  * the same virtual address range is referenced in a subsequent call of
 788  * scif_vreadfrom() or scif_vwriteto().
 789  *
 790  * The optimal DMA performance will likely be realized if both
 791  * addr and roffset are cacheline aligned (are a multiple of 64). Lower
 792  * performance will likely be realized if addr and roffset are not
 793  * cacheline aligned but are separated by some multiple of 64. The lowest level
 794  * of performance is likely if addr and roffset are not separated by a
 795  * multiple of 64.
 796  *
 797  * The rma_flags argument is formed by ORing together zero or more of the
 798  * following values.
 799  * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
 800  *      engine.
 801  * SCIF_RMA_USECACHE - enable registration caching
 802  * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
 803  *              transfer has completed. Passing this flag results in the
 804  *              current implementation busy waiting and consuming CPU cycles
 805  *              while the DMA transfer is in progress for best performance by
 806  *              avoiding the interrupt latency.
 807  * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
 808  *      the source range becomes visible on the destination node
 809  *      after all other transferred data in the source range has
 810  *      become visible on the destination
 811  *
 812  * Return:
 813  * Upon successful completion, scif_vreadfrom() returns 0; otherwise in user
 814  * mode -1 is returned and errno is set to indicate the error; in kernel mode
 815  * the negative of one of the following errors is returned.
 816  *
 817  * Errors:
 818  * EACCESS - Attempt to write to a read-only range
 819  * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 820  * ECONNRESET - Connection reset by peer
 821  * EINVAL - rma_flags is invalid
 822  * ENODEV - The remote node is lost or existed, but is not currently in the
 823  * network since it may have crashed
 824  * ENOTCONN - The endpoint is not connected
 825  * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the
 826  * registered address space of epd
 827  */
 828 int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset,
 829                    int rma_flags);
 830 
 831 /**
 832  * scif_vwriteto() - Copy to a remote address space
 833  * @epd:        endpoint descriptor
 834  * @addr:       address from which to copy
 835  * @len:        length of range to copy
 836  * @roffset:    offset in remote registered address space to
 837  *              which to copy
 838  * @rma_flags:  transfer mode flags
 839  *
 840  * scif_vwriteto() copies len bytes from the local memory, starting at addr, to
 841  * the remote registered address space of the peer of endpoint epd, starting at
 842  * the offset roffset.
 843  *
 844  * The specified range [roffset, roffset + len - 1] must be within some
 845  * registered window or windows of the remote nodes. The range may intersect
 846  * multiple registered windows, but only if those windows are contiguous in the
 847  * registered address space.
 848  *
 849  * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
 850  * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
 851  * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the
 852  * transfer is complete. Otherwise, the transfer may be performed asynchron-
 853  * ously. The order in which any two asynchronous RMA operations complete
 854  * is non-deterministic. The synchronization functions, scif_fence_mark()/
 855  * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
 856  * the completion of asynchronous RMA operations on the same endpoint.
 857  *
 858  * The DMA transfer of individual bytes is not guaranteed to complete in
 859  * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
 860  * cacheline or partial cacheline of the source range will become visible on
 861  * the destination node after all other transferred data in the source
 862  * range has become visible on the destination node.
 863  *
 864  * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
 865  * the specified local memory range may be remain in a pinned state even after
 866  * the specified transfer completes. This may reduce overhead if some or all of
 867  * the same virtual address range is referenced in a subsequent call of
 868  * scif_vreadfrom() or scif_vwriteto().
 869  *
 870  * The optimal DMA performance will likely be realized if both
 871  * addr and offset are cacheline aligned (are a multiple of 64). Lower
 872  * performance will likely be realized if addr and offset are not cacheline
 873  * aligned but are separated by some multiple of 64. The lowest level of
 874  * performance is likely if addr and offset are not separated by a multiple of
 875  * 64.
 876  *
 877  * The rma_flags argument is formed by ORing together zero or more of the
 878  * following values.
 879  * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
 880  *      engine.
 881  * SCIF_RMA_USECACHE - allow registration caching
 882  * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
 883  *              transfer has completed. Passing this flag results in the
 884  *              current implementation busy waiting and consuming CPU cycles
 885  *              while the DMA transfer is in progress for best performance by
 886  *              avoiding the interrupt latency.
 887  * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
 888  *              the source range becomes visible on the destination node
 889  *              after all other transferred data in the source range has
 890  *              become visible on the destination
 891  *
 892  * Return:
 893  * Upon successful completion, scif_vwriteto() returns 0; otherwise in user
 894  * mode -1 is returned and errno is set to indicate the error; in kernel mode
 895  * the negative of one of the following errors is returned.
 896  *
 897  * Errors:
 898  * EACCESS - Attempt to write to a read-only range
 899  * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 900  * ECONNRESET - Connection reset by peer
 901  * EINVAL - rma_flags is invalid
 902  * ENODEV - The remote node is lost or existed, but is not currently in the
 903  * network since it may have crashed
 904  * ENOTCONN - The endpoint is not connected
 905  * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the
 906  * registered address space of epd
 907  */
 908 int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t roffset,
 909                   int rma_flags);
 910 
 911 /**
 912  * scif_fence_mark() - Mark previously issued RMAs
 913  * @epd:        endpoint descriptor
 914  * @flags:      control flags
 915  * @mark:       marked value returned as output.
 916  *
 917  * scif_fence_mark() returns after marking the current set of all uncompleted
 918  * RMAs initiated through the endpoint epd or the current set of all
 919  * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are
 920  * marked with a value returned at mark. The application may subsequently call
 921  * scif_fence_wait(), passing the value returned at mark, to await completion
 922  * of all RMAs so marked.
 923  *
 924  * The flags argument has exactly one of the following values.
 925  * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint
 926  *      epd are marked
 927  * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer
 928  *      of endpoint epd are marked
 929  *
 930  * Return:
 931  * Upon successful completion, scif_fence_mark() returns 0; otherwise in user
 932  * mode -1 is returned and errno is set to indicate the error; in kernel mode
 933  * the negative of one of the following errors is returned.
 934  *
 935  * Errors:
 936  * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 937  * ECONNRESET - Connection reset by peer
 938  * EINVAL - flags is invalid
 939  * ENODEV - The remote node is lost or existed, but is not currently in the
 940  * network since it may have crashed
 941  * ENOTCONN - The endpoint is not connected
 942  * ENOMEM - Insufficient kernel memory was available
 943  */
 944 int scif_fence_mark(scif_epd_t epd, int flags, int *mark);
 945 
 946 /**
 947  * scif_fence_wait() - Wait for completion of marked RMAs
 948  * @epd:        endpoint descriptor
 949  * @mark:       mark request
 950  *
 951  * scif_fence_wait() returns after all RMAs marked with mark have completed.
 952  * The value passed in mark must have been obtained in a previous call to
 953  * scif_fence_mark().
 954  *
 955  * Return:
 956  * Upon successful completion, scif_fence_wait() returns 0; otherwise in user
 957  * mode -1 is returned and errno is set to indicate the error; in kernel mode
 958  * the negative of one of the following errors is returned.
 959  *
 960  * Errors:
 961  * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 962  * ECONNRESET - Connection reset by peer
 963  * ENODEV - The remote node is lost or existed, but is not currently in the
 964  * network since it may have crashed
 965  * ENOTCONN - The endpoint is not connected
 966  * ENOMEM - Insufficient kernel memory was available
 967  */
 968 int scif_fence_wait(scif_epd_t epd, int mark);
 969 
 970 /**
 971  * scif_fence_signal() - Request a memory update on completion of RMAs
 972  * @epd:        endpoint descriptor
 973  * @loff:       local offset
 974  * @lval:       local value to write to loffset
 975  * @roff:       remote offset
 976  * @rval:       remote value to write to roffset
 977  * @flags:      flags
 978  *
 979  * scif_fence_signal() returns after marking the current set of all uncompleted
 980  * RMAs initiated through the endpoint epd or marking the current set of all
 981  * uncompleted RMAs initiated through the peer of endpoint epd.
 982  *
 983  * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the
 984  * marked set, lval is written to memory at the address corresponding to offset
 985  * loff in the local registered address space of epd. loff must be within a
 986  * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion
 987  * of the RMAs in the marked set, rval is written to memory at the address
 988  * corresponding to offset roff in the remote registered address space of epd.
 989  * roff must be within a remote registered window of the peer of epd. Note
 990  * that any specified offset must be DWORD (4 byte / 32 bit) aligned.
 991  *
 992  * The flags argument is formed by OR'ing together the following.
 993  * Exactly one of the following values.
 994  * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint
 995  *      epd are marked
 996  * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer
 997  *      of endpoint epd are marked
 998  * One or more of the following values.
 999  * SCIF_SIGNAL_LOCAL - On completion of the marked set of RMAs, write lval to
1000  *      memory at the address corresponding to offset loff in the local
1001  *      registered address space of epd.
1002  * SCIF_SIGNAL_REMOTE - On completion of the marked set of RMAs, write rval to
1003  *      memory at the address corresponding to offset roff in the remote
1004  *      registered address space of epd.
1005  *
1006  * Return:
1007  * Upon successful completion, scif_fence_signal() returns 0; otherwise in
1008  * user mode -1 is returned and errno is set to indicate the error; in kernel
1009  * mode the negative of one of the following errors is returned.
1010  *
1011  * Errors:
1012  * EBADF, ENOTTY - epd is not a valid endpoint descriptor
1013  * ECONNRESET - Connection reset by peer
1014  * EINVAL - flags is invalid, or loff or roff are not DWORD aligned
1015  * ENODEV - The remote node is lost or existed, but is not currently in the
1016  * network since it may have crashed
1017  * ENOTCONN - The endpoint is not connected
1018  * ENXIO - loff is invalid for the registered address of epd, or roff is invalid
1019  * for the registered address space, of the peer of epd
1020  */
1021 int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff,
1022                       u64 rval, int flags);
1023 
1024 /**
1025  * scif_get_node_ids() - Return information about online nodes
1026  * @nodes:      array in which to return online node IDs
1027  * @len:        number of entries in the nodes array
1028  * @self:       address to place the node ID of the local node
1029  *
1030  * scif_get_node_ids() fills in the nodes array with up to len node IDs of the
1031  * nodes in the SCIF network. If there is not enough space in nodes, as
1032  * indicated by the len parameter, only len node IDs are returned in nodes. The
1033  * return value of scif_get_node_ids() is the total number of nodes currently in
1034  * the SCIF network. By checking the return value against the len parameter,
1035  * the user may determine if enough space for nodes was allocated.
1036  *
1037  * The node ID of the local node is returned at self.
1038  *
1039  * Return:
1040  * Upon successful completion, scif_get_node_ids() returns the actual number of
1041  * online nodes in the SCIF network including 'self'; otherwise in user mode
1042  * -1 is returned and errno is set to indicate the error; in kernel mode no
1043  * errors are returned.
1044  */
1045 int scif_get_node_ids(u16 *nodes, int len, u16 *self);
1046 
1047 /**
1048  * scif_pin_pages() - Pin a set of pages
1049  * @addr:               Virtual address of range to pin
1050  * @len:                Length of range to pin
1051  * @prot_flags:         Page protection flags
1052  * @map_flags:          Page classification flags
1053  * @pinned_pages:       Handle to pinned pages
1054  *
1055  * scif_pin_pages() pins (locks in physical memory) the physical pages which
1056  * back the range of virtual address pages starting at addr and continuing for
1057  * len bytes. addr and len are constrained to be multiples of the page size. A
1058  * successful scif_pin_pages() call returns a handle to pinned_pages which may
1059  * be used in subsequent calls to scif_register_pinned_pages().
1060  *
1061  * The pages will remain pinned as long as there is a reference against the
1062  * scif_pinned_pages_t value returned by scif_pin_pages() and until
1063  * scif_unpin_pages() is called, passing the scif_pinned_pages_t value. A
1064  * reference is added to a scif_pinned_pages_t value each time a window is
1065  * created by calling scif_register_pinned_pages() and passing the
1066  * scif_pinned_pages_t value. A reference is removed from a
1067  * scif_pinned_pages_t value each time such a window is deleted.
1068  *
1069  * Subsequent operations which change the memory pages to which virtual
1070  * addresses are mapped (such as mmap(), munmap()) have no effect on the
1071  * scif_pinned_pages_t value or windows created against it.
1072  *
1073  * If the process will fork(), it is recommended that the registered
1074  * virtual address range be marked with MADV_DONTFORK. Doing so will prevent
1075  * problems due to copy-on-write semantics.
1076  *
1077  * The prot_flags argument is formed by OR'ing together one or more of the
1078  * following values.
1079  * SCIF_PROT_READ - allow read operations against the pages
1080  * SCIF_PROT_WRITE - allow write operations against the pages
1081  * The map_flags argument can be set as SCIF_MAP_KERNEL to interpret addr as a
1082  * kernel space address. By default, addr is interpreted as a user space
1083  * address.
1084  *
1085  * Return:
1086  * Upon successful completion, scif_pin_pages() returns 0; otherwise the
1087  * negative of one of the following errors is returned.
1088  *
1089  * Errors:
1090  * EINVAL - prot_flags is invalid, map_flags is invalid, or offset is negative
1091  * ENOMEM - Not enough space
1092  */
1093 int scif_pin_pages(void *addr, size_t len, int prot_flags, int map_flags,
1094                    scif_pinned_pages_t *pinned_pages);
1095 
1096 /**
1097  * scif_unpin_pages() - Unpin a set of pages
1098  * @pinned_pages:       Handle to pinned pages to be unpinned
1099  *
1100  * scif_unpin_pages() prevents scif_register_pinned_pages() from registering new
1101  * windows against pinned_pages. The physical pages represented by pinned_pages
1102  * will remain pinned until all windows previously registered against
1103  * pinned_pages are deleted (the window is scif_unregister()'d and all
1104  * references to the window are removed (see scif_unregister()).
1105  *
1106  * pinned_pages must have been obtain from a previous call to scif_pin_pages().
1107  * After calling scif_unpin_pages(), it is an error to pass pinned_pages to
1108  * scif_register_pinned_pages().
1109  *
1110  * Return:
1111  * Upon successful completion, scif_unpin_pages() returns 0; otherwise the
1112  * negative of one of the following errors is returned.
1113  *
1114  * Errors:
1115  * EINVAL - pinned_pages is not valid
1116  */
1117 int scif_unpin_pages(scif_pinned_pages_t pinned_pages);
1118 
1119 /**
1120  * scif_register_pinned_pages() - Mark a memory region for remote access.
1121  * @epd:                endpoint descriptor
1122  * @pinned_pages:       Handle to pinned pages
1123  * @offset:             Registered address space offset
1124  * @map_flags:          Flags which control where pages are mapped
1125  *
1126  * The scif_register_pinned_pages() function opens a window, a range of whole
1127  * pages of the registered address space of the endpoint epd, starting at
1128  * offset po. The value of po, further described below, is a function of the
1129  * parameters offset and pinned_pages, and the value of map_flags. Each page of
1130  * the window represents a corresponding physical memory page of the range
1131  * represented by pinned_pages; the length of the window is the same as the
1132  * length of range represented by pinned_pages. A successful
1133  * scif_register_pinned_pages() call returns po as the return value.
1134  *
1135  * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset
1136  * exactly, and offset is constrained to be a multiple of the page size. The
1137  * mapping established by scif_register_pinned_pages() will not replace any
1138  * existing registration; an error is returned if any page of the new window
1139  * would intersect an existing window.
1140  *
1141  * When SCIF_MAP_FIXED is not set, the implementation uses offset in an
1142  * implementation-defined manner to arrive at po. The po so chosen will be an
1143  * area of the registered address space that the implementation deems suitable
1144  * for a mapping of the required size. An offset value of 0 is interpreted as
1145  * granting the implementation complete freedom in selecting po, subject to
1146  * constraints described below. A non-zero value of offset is taken to be a
1147  * suggestion of an offset near which the mapping should be placed. When the
1148  * implementation selects a value for po, it does not replace any extant
1149  * window. In all cases, po will be a multiple of the page size.
1150  *
1151  * The physical pages which are so represented by a window are available for
1152  * access in calls to scif_get_pages(), scif_readfrom(), scif_writeto(),
1153  * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the
1154  * physical pages represented by the window will not be reused by the memory
1155  * subsystem for any other purpose. Note that the same physical page may be
1156  * represented by multiple windows.
1157  *
1158  * Windows created by scif_register_pinned_pages() are unregistered by
1159  * scif_unregister().
1160  *
1161  * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a
1162  * fixed offset.
1163  *
1164  * Return:
1165  * Upon successful completion, scif_register_pinned_pages() returns the offset
1166  * at which the mapping was placed (po); otherwise the negative of one of the
1167  * following errors is returned.
1168  *
1169  * Errors:
1170  * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags and pages in the new window
1171  * would intersect an existing window
1172  * EAGAIN - The mapping could not be performed due to lack of resources
1173  * ECONNRESET - Connection reset by peer
1174  * EINVAL - map_flags is invalid, or SCIF_MAP_FIXED is set in map_flags, and
1175  * offset is not a multiple of the page size, or offset is negative
1176  * ENODEV - The remote node is lost or existed, but is not currently in the
1177  * network since it may have crashed
1178  * ENOMEM - Not enough space
1179  * ENOTCONN - The endpoint is not connected
1180  */
1181 off_t scif_register_pinned_pages(scif_epd_t epd,
1182                                  scif_pinned_pages_t pinned_pages,
1183                                  off_t offset, int map_flags);
1184 
1185 /**
1186  * scif_get_pages() - Add references to remote registered pages
1187  * @epd:        endpoint descriptor
1188  * @offset:     remote registered offset
1189  * @len:        length of range of pages
1190  * @pages:      returned scif_range structure
1191  *
1192  * scif_get_pages() returns the addresses of the physical pages represented by
1193  * those pages of the registered address space of the peer of epd, starting at
1194  * offset and continuing for len bytes. offset and len are constrained to be
1195  * multiples of the page size.
1196  *
1197  * All of the pages in the specified range [offset, offset + len - 1] must be
1198  * within a single window of the registered address space of the peer of epd.
1199  *
1200  * The addresses are returned as a virtually contiguous array pointed to by the
1201  * phys_addr component of the scif_range structure whose address is returned in
1202  * pages. The nr_pages component of scif_range is the length of the array. The
1203  * prot_flags component of scif_range holds the protection flag value passed
1204  * when the pages were registered.
1205  *
1206  * Each physical page whose address is returned by scif_get_pages() remains
1207  * available and will not be released for reuse until the scif_range structure
1208  * is returned in a call to scif_put_pages(). The scif_range structure returned
1209  * by scif_get_pages() must be unmodified.
1210  *
1211  * It is an error to call scif_close() on an endpoint on which a scif_range
1212  * structure of that endpoint has not been returned to scif_put_pages().
1213  *
1214  * Return:
1215  * Upon successful completion, scif_get_pages() returns 0; otherwise the
1216  * negative of one of the following errors is returned.
1217  * Errors:
1218  * ECONNRESET - Connection reset by peer.
1219  * EINVAL - offset is not a multiple of the page size, or offset is negative, or
1220  * len is not a multiple of the page size
1221  * ENODEV - The remote node is lost or existed, but is not currently in the
1222  * network since it may have crashed
1223  * ENOTCONN - The endpoint is not connected
1224  * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid
1225  * for the registered address space of the peer epd
1226  */
1227 int scif_get_pages(scif_epd_t epd, off_t offset, size_t len,
1228                    struct scif_range **pages);
1229 
1230 /**
1231  * scif_put_pages() - Remove references from remote registered pages
1232  * @pages:      pages to be returned
1233  *
1234  * scif_put_pages() releases a scif_range structure previously obtained by
1235  * calling scif_get_pages(). The physical pages represented by pages may
1236  * be reused when the window which represented those pages is unregistered.
1237  * Therefore, those pages must not be accessed after calling scif_put_pages().
1238  *
1239  * Return:
1240  * Upon successful completion, scif_put_pages() returns 0; otherwise the
1241  * negative of one of the following errors is returned.
1242  * Errors:
1243  * EINVAL - pages does not point to a valid scif_range structure, or
1244  * the scif_range structure pointed to by pages was already returned
1245  * ENODEV - The remote node is lost or existed, but is not currently in the
1246  * network since it may have crashed
1247  * ENOTCONN - The endpoint is not connected
1248  */
1249 int scif_put_pages(struct scif_range *pages);
1250 
1251 /**
1252  * scif_poll() - Wait for some event on an endpoint
1253  * @epds:       Array of endpoint descriptors
1254  * @nepds:      Length of epds
1255  * @timeout:    Upper limit on time for which scif_poll() will block
1256  *
1257  * scif_poll() waits for one of a set of endpoints to become ready to perform
1258  * an I/O operation.
1259  *
1260  * The epds argument specifies the endpoint descriptors to be examined and the
1261  * events of interest for each endpoint descriptor. epds is a pointer to an
1262  * array with one member for each open endpoint descriptor of interest.
1263  *
1264  * The number of items in the epds array is specified in nepds. The epd field
1265  * of scif_pollepd is an endpoint descriptor of an open endpoint. The field
1266  * events is a bitmask specifying the events which the application is
1267  * interested in. The field revents is an output parameter, filled by the
1268  * kernel with the events that actually occurred. The bits returned in revents
1269  * can include any of those specified in events, or one of the values EPOLLERR,
1270  * EPOLLHUP, or EPOLLNVAL. (These three bits are meaningless in the events
1271  * field, and will be set in the revents field whenever the corresponding
1272  * condition is true.)
1273  *
1274  * If none of the events requested (and no error) has occurred for any of the
1275  * endpoint descriptors, then scif_poll() blocks until one of the events occurs.
1276  *
1277  * The timeout argument specifies an upper limit on the time for which
1278  * scif_poll() will block, in milliseconds. Specifying a negative value in
1279  * timeout means an infinite timeout.
1280  *
1281  * The following bits may be set in events and returned in revents.
1282  * EPOLLIN - Data may be received without blocking. For a connected
1283  * endpoint, this means that scif_recv() may be called without blocking. For a
1284  * listening endpoint, this means that scif_accept() may be called without
1285  * blocking.
1286  * EPOLLOUT - Data may be sent without blocking. For a connected endpoint, this
1287  * means that scif_send() may be called without blocking. EPOLLOUT may also be
1288  * used to block waiting for a non-blocking connect to complete. This bit value
1289  * has no meaning for a listening endpoint and is ignored if specified.
1290  *
1291  * The following bits are only returned in revents, and are ignored if set in
1292  * events.
1293  * EPOLLERR - An error occurred on the endpoint
1294  * EPOLLHUP - The connection to the peer endpoint was disconnected
1295  * EPOLLNVAL - The specified endpoint descriptor is invalid.
1296  *
1297  * Return:
1298  * Upon successful completion, scif_poll() returns a non-negative value. A
1299  * positive value indicates the total number of endpoint descriptors that have
1300  * been selected (that is, endpoint descriptors for which the revents member is
1301  * non-zero). A value of 0 indicates that the call timed out and no endpoint
1302  * descriptors have been selected. Otherwise in user mode -1 is returned and
1303  * errno is set to indicate the error; in kernel mode the negative of one of
1304  * the following errors is returned.
1305  *
1306  * Errors:
1307  * EINTR - A signal occurred before any requested event
1308  * EINVAL - The nepds argument is greater than {OPEN_MAX}
1309  * ENOMEM - There was no space to allocate file descriptor tables
1310  */
1311 int scif_poll(struct scif_pollepd *epds, unsigned int nepds, long timeout);
1312 
1313 /**
1314  * scif_client_register() - Register a SCIF client
1315  * @client:     client to be registered
1316  *
1317  * scif_client_register() registers a SCIF client. The probe() method
1318  * of the client is called when SCIF peer devices come online and the
1319  * remove() method is called when the peer devices disappear.
1320  *
1321  * Return:
1322  * Upon successful completion, scif_client_register() returns a non-negative
1323  * value. Otherwise the return value is the same as subsys_interface_register()
1324  * in the kernel.
1325  */
1326 int scif_client_register(struct scif_client *client);
1327 
1328 /**
1329  * scif_client_unregister() - Unregister a SCIF client
1330  * @client:     client to be unregistered
1331  *
1332  * scif_client_unregister() unregisters a SCIF client.
1333  *
1334  * Return:
1335  * None
1336  */
1337 void scif_client_unregister(struct scif_client *client);
1338 
1339 #endif /* __SCIF_H__ */

/* [<][>][^][v][top][bottom][index][help] */