diff options
Diffstat (limited to 'src/vcl/vcom_socket.c')
-rw-r--r-- | src/vcl/vcom_socket.c | 3443 |
1 files changed, 3443 insertions, 0 deletions
diff --git a/src/vcl/vcom_socket.c b/src/vcl/vcom_socket.c new file mode 100644 index 00000000000..8806ebd011a --- /dev/null +++ b/src/vcl/vcom_socket.c @@ -0,0 +1,3443 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <unistd.h> +#include <stdio.h> +#include <sys/uio.h> +#include <limits.h> +#define __need_IOV_MAX +#include <bits/stdio_lim.h> +#include <netinet/tcp.h> + +#include <vppinfra/types.h> +#include <vppinfra/hash.h> +#include <vppinfra/pool.h> + +#include <vcl/vcom_socket.h> +#include <vcl/vcom_socket_wrapper.h> +#include <vcl/vcom.h> + +#include <vcl/vppcom.h> + + +/* + * VCOM_SOCKET Private definitions and functions. + */ + +typedef struct vcom_socket_main_t_ +{ + u8 init; + + /* vcom_socket pool */ + vcom_socket_t *vsockets; + + /* Hash table for socketidx to fd mapping */ + uword *sockidx_by_fd; + + /* vcom_epoll pool */ + vcom_epoll_t *vepolls; + + /* Hash table for epollidx to epfd mapping */ + uword *epollidx_by_epfd; + + + /* common epitem poll for all epfd */ + /* TBD: epitem poll per epfd */ + /* vcom_epitem pool */ + vcom_epitem_t *vepitems; + + /* Hash table for epitemidx to epfdfd mapping */ + uword *epitemidx_by_epfdfd; + + /* Hash table - key:epfd, value:vec of epitemidx */ + uword *epitemidxs_by_epfd; + /* Hash table - key:fd, value:vec of epitemidx */ + uword *epitemidxs_by_fd; + +} vcom_socket_main_t; + +vcom_socket_main_t vcom_socket_main; + + +static int +vcom_socket_open_socket (int domain, int type, int protocol) +{ + int rv = -1; + + /* handle domains implemented by vpp */ + switch (domain) + { + case AF_INET: + case AF_INET6: + /* get socket type and + * handle the socket types supported by vpp */ + switch (type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + { + case SOCK_STREAM: + case SOCK_DGRAM: + /* the type argument serves a second purpose, + * in addition to specifying a socket type, + * it may include the bitwise OR of any of + * SOCK_NONBLOCK and SOCK_CLOEXEC, to modify + * the behavior of socket. */ + rv = libc_socket (domain, type, protocol); + if (rv == -1) + rv = -errno; + break; + + default: + break; + } + + break; + + default: + break; + } + + return rv; +} + +static int +vcom_socket_open_epoll (int flags) +{ + int rv = -1; + + if (flags < 0) + { + return -EINVAL; + } + if (flags && (flags & ~EPOLL_CLOEXEC)) + { + return -EINVAL; + } + + /* flags can be either zero or EPOLL_CLOEXEC */ + rv = libc_epoll_create1 (flags); + if (rv == -1) + rv = -errno; + + return rv; +} + +static int +vcom_socket_close_socket (int fd) +{ + int rv; + + rv = libc_close (fd); + if (rv == -1) + rv = -errno; + + return rv; +} + +static int +vcom_socket_close_epoll (int epfd) +{ + int rv; + + rv = libc_close (epfd); + if (rv == -1) + rv = -errno; + + return rv; +} + +/* + * Public API functions + */ + + +int +vcom_socket_is_vcom_fd (int fd) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, fd); + + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND) + return 1; + } + return 0; +} + +int +vcom_socket_is_vcom_epfd (int epfd) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_epoll_t *vepoll; + + p = hash_get (vsm->epollidx_by_epfd, epfd); + + if (p) + { + vepoll = pool_elt_at_index (vsm->vepolls, p[0]); + if (vepoll && vepoll->type == EPOLL_TYPE_VPPCOM_BOUND) + return 1; + } + return 0; +} + +static inline int +vcom_socket_get_sid (int fd) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, fd); + + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND) + return vsock->sid; + } + return INVALID_SESSION_ID; +} + +static inline int +vcom_socket_get_vep_idx (int epfd) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_epoll_t *vepoll; + + p = hash_get (vsm->epollidx_by_epfd, epfd); + + if (p) + { + vepoll = pool_elt_at_index (vsm->vepolls, p[0]); + if (vepoll && vepoll->type == EPOLL_TYPE_VPPCOM_BOUND) + return vepoll->vep_idx; + } + return INVALID_VEP_IDX; +} + +static inline int +vcom_socket_get_sid_and_vsock (int fd, vcom_socket_t ** vsockp) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, fd); + + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND) + { + *vsockp = vsock; + return vsock->sid; + } + } + return INVALID_SESSION_ID; +} + +static inline int +vcom_socket_get_vep_idx_and_vepoll (int epfd, vcom_epoll_t ** vepollp) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_epoll_t *vepoll; + + p = hash_get (vsm->epollidx_by_epfd, epfd); + + if (p) + { + vepoll = pool_elt_at_index (vsm->vepolls, p[0]); + if (vepoll && vepoll->type == EPOLL_TYPE_VPPCOM_BOUND) + { + *vepollp = vepoll; + return vepoll->vep_idx; + } + } + return INVALID_VEP_IDX; +} + + +static int +vcom_socket_close_vepoll (int epfd) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_epoll_t *vepoll; + + p = hash_get (vsm->epollidx_by_epfd, epfd); + if (!p) + return -EBADF; + + vepoll = pool_elt_at_index (vsm->vepolls, p[0]); + if (!vepoll) + return -EBADF; + + if (vepoll->type != EPOLL_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (vepoll->count) + { + if (!vepoll->close) + { + vepoll->close = 1; + return 0; + } + else + { + return -EBADF; + } + } + + /* count is zero */ + rv = vppcom_session_close (vepoll->vep_idx); + rv = vcom_socket_close_epoll (vepoll->epfd); + + vepoll_init (vepoll); + hash_unset (vsm->epollidx_by_epfd, epfd); + pool_put (vsm->vepolls, vepoll); + + return rv; +} + +static int +vcom_socket_close_vsock (int fd) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + vcom_epitem_t *vepitem; + + i32 *vepitemidxs = 0; + i32 *vepitemidxs_var = 0; + + p = hash_get (vsm->sockidx_by_fd, fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + rv = vppcom_session_close (vsock->sid); + rv = vcom_socket_close_socket (vsock->fd); + + vsocket_init (vsock); + hash_unset (vsm->sockidx_by_fd, fd); + pool_put (vsm->vsockets, vsock); + + /* + * NOTE: + * Before calling close(), user should remove + * this fd from the epoll-set of all epoll instances, + * otherwise resource(epitems) leaks ensues. + */ + + /* + * 00. close all epoll instances that are marked as "close" + * of which this fd is the "last" remaining member. + * 01. epitems associated with this fd are intentionally + * not removed, see NOTE: above. + * */ + + /* does this fd participate in epoll */ + p = hash_get (vsm->epitemidxs_by_fd, fd); + if (p) + { + vepitemidxs = *(i32 **) p; + vec_foreach (vepitemidxs_var, vepitemidxs) + { + vepitem = pool_elt_at_index (vsm->vepitems, vepitemidxs_var[0]); + if (vepitem && vepitem->fd == fd && + vepitem->type == FD_TYPE_VCOM_SOCKET) + { + i32 vep_idx; + vcom_epoll_t *vepoll; + if ((vep_idx = + vcom_socket_get_vep_idx_and_vepoll (vepitem->epfd, + &vepoll)) != + INVALID_VEP_IDX) + { + if (vepoll->close) + { + if (vepoll->count == 1) + { + /* + * force count to zero and + * close this epoll instance + * */ + vepoll->count = 0; + vcom_socket_close_vepoll (vepoll->epfd); + } + else + { + vepoll->count -= 1; + } + } + } + } + + } + } + + return rv; +} + +int +vcom_socket_close (int __fd) +{ + int rv; + + if (vcom_socket_is_vcom_fd (__fd)) + { + rv = vcom_socket_close_vsock (__fd); + } + else if (vcom_socket_is_vcom_epfd (__fd)) + { + rv = vcom_socket_close_vepoll (__fd); + } + else + { + rv = -EBADF; + } + + return rv; +} + +ssize_t +vcom_socket_read (int __fd, void *__buf, size_t __nbytes) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__buf) + { + return -EINVAL; + } + + rv = vcom_fcntl (__fd, F_GETFL, 0); + if (rv < 0) + { + return rv; + + } + + /* is blocking */ + if (!(rv & O_NONBLOCK)) + { + do + { + rv = vppcom_session_read (vsock->sid, __buf, __nbytes); + } + while (rv == -EAGAIN || rv == -EWOULDBLOCK); + return rv; + } + /* The file descriptor refers to a socket and has been + * marked nonblocking(O_NONBLOCK) and the read would + * block. + * */ + /* is non blocking */ + rv = vppcom_session_read (vsock->sid, __buf, __nbytes); + return rv; +} + +ssize_t +vcom_socket_readv (int __fd, const struct iovec * __iov, int __iovcnt) +{ + int rv; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + ssize_t total = 0, len = 0; + int i; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (__iov == 0 || __iovcnt == 0 || __iovcnt > IOV_MAX) + return -EINVAL; + + /* Sanity check */ + for (i = 0; i < __iovcnt; ++i) + { + if (SSIZE_MAX - len < __iov[i].iov_len) + return -EINVAL; + len += __iov[i].iov_len; + } + + rv = vcom_fcntl (__fd, F_GETFL, 0); + if (rv < 0) + { + return rv; + } + + /* is blocking */ + if (!(rv & O_NONBLOCK)) + { + do + { + for (i = 0; i < __iovcnt; ++i) + { + rv = vppcom_session_read (vsock->sid, __iov[i].iov_base, + __iov[i].iov_len); + if (rv < 0) + break; + else + { + total += rv; + if (rv < __iov[i].iov_len) + /* Read less than buffer provided, no point to continue */ + break; + } + } + } + while ((rv == -EAGAIN || rv == -EWOULDBLOCK) && total == 0); + return total; + } + + /* is non blocking */ + for (i = 0; i < __iovcnt; ++i) + { + rv = vppcom_session_read (vsock->sid, __iov[i].iov_base, + __iov[i].iov_len); + if (rv < 0) + { + if (total > 0) + break; + else + { + errno = rv; + return rv; + } + } + else + { + total += rv; + if (rv < __iov[i].iov_len) + /* Read less than buffer provided, no point to continue */ + break; + } + } + return total; +} + +ssize_t +vcom_socket_write (int __fd, const void *__buf, size_t __n) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + if (!__buf) + { + return -EINVAL; + } + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + rv = vppcom_session_write (vsock->sid, (void *) __buf, __n); + return rv; +} + +ssize_t +vcom_socket_writev (int __fd, const struct iovec * __iov, int __iovcnt) +{ + int rv = -1; + ssize_t total = 0; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + int i; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (__iov == 0 || __iovcnt == 0 || __iovcnt > IOV_MAX) + return -EINVAL; + + for (i = 0; i < __iovcnt; ++i) + { + rv = vppcom_session_write (vsock->sid, __iov[i].iov_base, + __iov[i].iov_len); + if (rv < 0) + { + if (total > 0) + break; + else + return rv; + } + else + total += rv; + } + return total; +} + +/* + * RETURN: 0 - invalid cmd + * 1 - cmd not handled by vcom and vppcom + * 2 - cmd handled by vcom socket resource + * 3 - cmd handled by vppcom + * */ +/* TBD: incomplete list of cmd */ +static int +vcom_socket_check_fcntl_cmd (int __cmd) +{ + switch (__cmd) + { + /*cmd not handled by vcom and vppcom */ + /* Fallthrough */ + case F_DUPFD: + case F_DUPFD_CLOEXEC: + return 1; + + /* cmd handled by vcom socket resource */ + /* Fallthrough */ + case F_GETFD: + case F_SETFD: + case F_GETLK: + case F_SETLK: + case F_SETLKW: + case F_GETOWN: + case F_SETOWN: + return 2; + + /* cmd handled by vcom and vppcom */ + case F_SETFL: + case F_GETFL: + return 3; + + /* cmd not handled by vcom and vppcom */ + default: + return 1; + } + return 0; +} + +static inline int +vcom_session_fcntl_va (int __sid, int __cmd, va_list __ap) +{ + int flags = va_arg (__ap, int); + int rv = -EOPNOTSUPP; + uint32_t size; + + size = sizeof (flags); + if (__cmd == F_SETFL) + { + rv = vppcom_session_attr (__sid, VPPCOM_ATTR_SET_FLAGS, &flags, &size); + } + else if (__cmd == F_GETFL) + { + rv = vppcom_session_attr (__sid, VPPCOM_ATTR_GET_FLAGS, &flags, &size); + if (rv == VPPCOM_OK) + rv = flags; + } + + return rv; +} + +int +vcom_socket_fcntl_va (int __fd, int __cmd, va_list __ap) +{ + int rv = -EBADF; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + switch (vcom_socket_check_fcntl_cmd (__cmd)) + { + /* invalid cmd */ + case 0: + rv = -EBADF; + break; + /*cmd not handled by vcom and vppcom */ + case 1: + rv = libc_vfcntl (vsock->fd, __cmd, __ap); + break; + /* cmd handled by vcom socket resource */ + case 2: + rv = libc_vfcntl (vsock->fd, __cmd, __ap); + break; + /* cmd handled by vppcom */ + case 3: + rv = vcom_session_fcntl_va (vsock->sid, __cmd, __ap); + break; + + default: + rv = -EINVAL; + break; + } + + return rv; +} + +/* + * RETURN: 0 - invalid cmd + * 1 - cmd not handled by vcom and vppcom + * 2 - cmd handled by vcom socket resource + * 3 - cmd handled by vppcom + */ +static int +vcom_socket_check_ioctl_cmd (unsigned long int __cmd) +{ + int rc; + + switch (__cmd) + { + /* cmd handled by vppcom */ + case FIONREAD: + rc = 3; + break; + + /* cmd not handled by vcom and vppcom */ + default: + rc = 1; + break; + } + return rc; +} + +static inline int +vcom_session_ioctl_va (int __sid, int __cmd, va_list __ap) +{ + int rv; + + if (__cmd == FIONREAD) + rv = vppcom_session_attr (__sid, VPPCOM_ATTR_GET_NREAD, 0, 0); + else + rv = -EOPNOTSUPP; + return rv; +} + +int +vcom_socket_ioctl_va (int __fd, unsigned long int __cmd, va_list __ap) +{ + int rv = -EBADF; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + switch (vcom_socket_check_ioctl_cmd (__cmd)) + { + /* Not supported cmd */ + case 0: + rv = -EOPNOTSUPP; + break; + + /* cmd not handled by vcom and vppcom */ + case 1: + rv = libc_vioctl (vsock->fd, __cmd, __ap); + break; + + /* cmd handled by vcom socket resource */ + case 2: + rv = libc_vioctl (vsock->fd, __cmd, __ap); + break; + + /* cmd handled by vppcom */ + case 3: + rv = vcom_session_ioctl_va (vsock->sid, __cmd, __ap); + break; + + default: + rv = -EINVAL; + break; + } + + return rv; +} + +static inline int +vcom_socket_fds_2_sid_fds ( + /* dest */ + int *vcom_nsid_fds, + fd_set * __restrict vcom_rd_sid_fds, + fd_set * __restrict vcom_wr_sid_fds, + fd_set * __restrict vcom_ex_sid_fds, + /* src */ + int vcom_nfds, + fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds) +{ + int rv = 0; + int fd; + int sid; + /* invalid max_sid is -1 */ + int max_sid = -1; + int nsid = 0; + + /* + * set sid in sid sets corresponding to fd's in fd sets + * compute nsid and vcom_nsid_fds from sid sets + */ + + for (fd = 0; fd < vcom_nfds; fd++) + { + /* + * F fd set, src + * S sid set, dest + */ +#define _(S,F) \ + if ((F) && (S) && FD_ISSET (fd, (F))) \ + { \ + sid = vcom_socket_get_sid (fd); \ + if (sid != INVALID_SESSION_ID) \ + { \ + FD_SET (sid, (S)); \ + if (sid > max_sid) \ + { \ + max_sid = sid; \ + } \ + ++nsid; \ + } \ + else \ + { \ + rv = -EBADFD; \ + goto done; \ + } \ + } + + + _(vcom_rd_sid_fds, vcom_readfds); + _(vcom_wr_sid_fds, vcom_writefds); + _(vcom_ex_sid_fds, vcom_exceptfds); +#undef _ + } + + *vcom_nsid_fds = max_sid != -1 ? max_sid + 1 : 0; + rv = nsid; + +done: + return rv; +} + +/* + * PRE: 00. sid sets were derived from fd sets + * 01. sid sets were updated with sids that actually changed + * status + * 02. fd sets still has watched fds + * + * This function will modify in place fd sets to indicate which fd's + * actually changed status(inferred from sid sets) + */ +static inline int +vcom_socket_sid_fds_2_fds ( + /* dest */ + int *new_vcom_nfds, + int vcom_nfds, + fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds, + /* src */ + int vcom_nsid_fds, + fd_set * __restrict vcom_rd_sid_fds, + fd_set * __restrict vcom_wr_sid_fds, + fd_set * __restrict vcom_ex_sid_fds) +{ + int rv = 0; + int fd; + int sid; + /* invalid max_fd is -1 */ + int max_fd = -1; + int nfd = 0; + + + /* + * modify in place fd sets to indicate which fd's + * actually changed status(inferred from sid sets) + */ + for (fd = 0; fd < vcom_nfds; fd++) + { + /* + * F fd set, dest + * S sid set, src + */ +#define _(S,F) \ + if ((F) && (S) && FD_ISSET (fd, (F))) \ + { \ + sid = vcom_socket_get_sid (fd); \ + if (sid != INVALID_SESSION_ID) \ + { \ + if (!FD_ISSET (sid, (S))) \ + { \ + FD_CLR(fd, (F)); \ + } \ + } \ + else \ + { \ + rv = -EBADFD; \ + goto done; \ + } \ + } + + + _(vcom_rd_sid_fds, vcom_readfds); + _(vcom_wr_sid_fds, vcom_writefds); + _(vcom_ex_sid_fds, vcom_exceptfds); +#undef _ + } + + /* + * compute nfd and new_vcom_nfds from fd sets + */ + for (fd = 0; fd < vcom_nfds; fd++) + { + +#define _(F) \ + if ((F) && FD_ISSET (fd, (F))) \ + { \ + if (fd > max_fd) \ + { \ + max_fd = fd; \ + } \ + ++nfd; \ + } + + + _(vcom_readfds); + _(vcom_writefds); + _(vcom_exceptfds); +#undef _ + + } + + *new_vcom_nfds = max_fd != -1 ? max_fd + 1 : 0; + rv = nfd; + +done: + return rv; +} + +/* + * PRE: + * vom_socket_select is always called with + * timeout->tv_sec and timeout->tv_usec set to zero. + * hence vppcom_select return immediately. + */ +/* + * TBD: do{body;} while(timeout conditional); timeout loop + */ +int +vcom_socket_select (int vcom_nfds, fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds, + struct timeval *__restrict timeout) +{ + static unsigned long vcom_nsid_fds = 0; + int vcom_nsid = 0; + int rv = -EBADF; + pid_t pid = getpid (); + + int new_vcom_nfds = 0; + int new_vcom_nfd = 0; + + /* vcom sid fds */ + fd_set vcom_rd_sid_fds; + fd_set vcom_wr_sid_fds; + fd_set vcom_ex_sid_fds; + + /* in seconds eg. 3.123456789 seconds */ + double time_to_wait = (double) 0; + + /* validate inputs */ + if (vcom_nfds < 0) + { + return -EINVAL; + } + + /* convert timeval timeout to double time_to_wait */ + if (timeout) + { + if (timeout->tv_sec == 0 && timeout->tv_usec == 0) + { + /* polling: vppcom_select returns immediately */ + time_to_wait = (double) 0; + } + else + { + /*TBD: use timeval api */ + time_to_wait = (double) timeout->tv_sec + + (double) timeout->tv_usec / (double) 1000000 + + (double) (timeout->tv_usec % 1000000) / (double) 1000000; + } + } + else + { + /* + * no timeout: vppcom_select can block indefinitely + * waiting for a file descriptor to become ready + * */ + /* set to a phantom value */ + time_to_wait = ~0; + } + + /* zero the sid_sets */ + /* + * F fd set + * S sid set + */ +#define _(S,F) \ + if ((F)) \ + { \ + FD_ZERO ((S)); \ + } + + + _(&vcom_rd_sid_fds, vcom_readfds); + _(&vcom_wr_sid_fds, vcom_writefds); + _(&vcom_ex_sid_fds, vcom_exceptfds); +#undef _ + + if (vcom_nfds == 0) + { + if (time_to_wait > 0) + { + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] vcom_socket_select called to " + "emulate delay_ns()!\n", pid); + rv = vppcom_select (0, NULL, NULL, NULL, time_to_wait); + } + else + { + fprintf (stderr, "[%d] vcom_socket_select called vcom_nfds = 0 " + "and invalid time_to_wait (%f)!\n", pid, time_to_wait); + } + return 0; + } + + /* populate read, write and except sid_sets */ + vcom_nsid = vcom_socket_fds_2_sid_fds ( + /* dest */ + vcom_readfds || vcom_writefds + || vcom_exceptfds ? (int *) + &vcom_nsid_fds : NULL, + vcom_readfds ? &vcom_rd_sid_fds : + NULL, + vcom_writefds ? &vcom_wr_sid_fds : + NULL, + vcom_exceptfds ? &vcom_ex_sid_fds : + NULL, + /* src */ + vcom_nfds, + vcom_readfds, + vcom_writefds, vcom_exceptfds); + if (vcom_nsid < 0) + { + return vcom_nsid; + } + + rv = vppcom_select (vcom_nsid_fds, + vcom_readfds ? (unsigned long *) &vcom_rd_sid_fds : + NULL, + vcom_writefds ? (unsigned long *) &vcom_wr_sid_fds : + NULL, + vcom_exceptfds ? (unsigned long *) &vcom_ex_sid_fds : + NULL, time_to_wait); + if (VCOM_DEBUG > 2) + fprintf (stderr, "[%d] called vppcom_select(): " + "'%04d'='%04d'\n", pid, rv, (int) vcom_nsid_fds); + + /* check if any file descriptors changed status */ + if (rv > 0) + { + /* + * on exit, sets are modified in place to indicate which + * file descriptors actually changed status + * */ + + /* + * comply with pre-condition + * do not clear vcom fd sets befor calling + * vcom_socket_sid_fds_2_fds + */ + new_vcom_nfd = vcom_socket_sid_fds_2_fds ( + /* dest */ + &new_vcom_nfds, + vcom_nfds, + vcom_readfds, + vcom_writefds, + vcom_exceptfds, + /* src */ + vcom_nsid_fds, + vcom_readfds ? + &vcom_rd_sid_fds : NULL, + vcom_writefds ? + &vcom_wr_sid_fds : NULL, + vcom_exceptfds ? + &vcom_ex_sid_fds : NULL); + if (new_vcom_nfd < 0) + { + return new_vcom_nfd; + } + if (new_vcom_nfds < 0) + { + return -EINVAL; + } + rv = new_vcom_nfd; + } + return rv; +} + + +int +vcom_socket_socket (int __domain, int __type, int __protocol) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_socket_t *vsock; + + i32 fd; + i32 sid; + i32 sockidx; + u8 is_nonblocking = __type & SOCK_NONBLOCK ? 1 : 0; + int type = __type & ~(SOCK_NONBLOCK | SOCK_CLOEXEC); + + fd = vcom_socket_open_socket (__domain, __type, __protocol); + if (fd < 0) + { + rv = fd; + goto out; + } + + sid = vppcom_session_create (VPPCOM_VRF_DEFAULT, + (type == SOCK_DGRAM) ? + VPPCOM_PROTO_UDP : VPPCOM_PROTO_TCP, + is_nonblocking); + if (sid < 0) + { + rv = sid; + goto out_close_socket; + } + + pool_get (vsm->vsockets, vsock); + vsocket_init (vsock); + + sockidx = vsock - vsm->vsockets; + hash_set (vsm->sockidx_by_fd, fd, sockidx); + + vsocket_set (vsock, fd, sid, SOCKET_TYPE_VPPCOM_BOUND); + return fd; + +out_close_socket: + vcom_socket_close_socket (fd); +out: + return rv; +} + +int +vcom_socket_socketpair (int __domain, int __type, int __protocol, + int __fds[2]) +{ +/* TBD: */ + return 0; +} + +int +vcom_socket_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + vppcom_endpt_t ep; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__addr) + { + return -EINVAL; + } + + ep.vrf = VPPCOM_VRF_DEFAULT; + switch (__addr->sa_family) + { + case AF_INET: + if (__len != sizeof (struct sockaddr_in)) + { + return -EINVAL; + } + ep.is_ip4 = VPPCOM_IS_IP4; + ep.ip = (u8 *) & ((const struct sockaddr_in *) __addr)->sin_addr; + ep.port = (u16) ((const struct sockaddr_in *) __addr)->sin_port; + break; + + case AF_INET6: + if (__len != sizeof (struct sockaddr_in6)) + { + return -EINVAL; + } + ep.is_ip4 = VPPCOM_IS_IP6; + ep.ip = (u8 *) & ((const struct sockaddr_in6 *) __addr)->sin6_addr; + ep.port = (u16) ((const struct sockaddr_in6 *) __addr)->sin6_port; + break; + + default: + return -1; + break; + } + + rv = vppcom_session_bind (vsock->sid, &ep); + return rv; +} + +static inline int +vcom_session_getsockname (int sid, vppcom_endpt_t * ep) +{ + int rv; + uint32_t size = sizeof (*ep); + + rv = vppcom_session_attr (sid, VPPCOM_ATTR_GET_LCL_ADDR, ep, &size); + return rv; +} + +int +vcom_socket_getsockname (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__addr || !__len) + return -EFAULT; + + vppcom_endpt_t ep; + ep.ip = (u8 *) & ((const struct sockaddr_in *) __addr)->sin_addr; + rv = vcom_session_getsockname (vsock->sid, &ep); + if (rv == 0) + { + if (ep.vrf == VPPCOM_VRF_DEFAULT) + { + __addr->sa_family = ep.is_ip4 == VPPCOM_IS_IP4 ? AF_INET : AF_INET6; + switch (__addr->sa_family) + { + case AF_INET: + ((struct sockaddr_in *) __addr)->sin_port = ep.port; + *__len = sizeof (struct sockaddr_in); + break; + + case AF_INET6: + ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port; + *__len = sizeof (struct sockaddr_in6); + break; + + default: + break; + } + } + } + + return rv; +} + +int +vcom_socket_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + vppcom_endpt_t ep; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + + ep.vrf = VPPCOM_VRF_DEFAULT; + switch (__addr->sa_family) + { + case AF_INET: + ep.is_ip4 = VPPCOM_IS_IP4; + ep.ip = + (uint8_t *) & ((const struct sockaddr_in *) __addr)->sin_addr; + ep.port = + (uint16_t) ((const struct sockaddr_in *) __addr)->sin_port; + break; + + case AF_INET6: + ep.is_ip4 = VPPCOM_IS_IP6; + ep.ip = + (uint8_t *) & ((const struct sockaddr_in6 *) __addr)->sin6_addr; + ep.port = + (uint16_t) ((const struct sockaddr_in6 *) __addr)->sin6_port; + break; + + default: + return -1; + break; + } + + rv = vppcom_session_connect (vsock->sid, &ep); + } + return rv; +} + +static inline int +vcom_session_getpeername (int sid, vppcom_endpt_t * ep) +{ + int rv; + uint32_t size = sizeof (*ep); + + rv = vppcom_session_attr (sid, VPPCOM_ATTR_GET_PEER_ADDR, ep, &size); + return rv; +} + +int +vcom_socket_getpeername (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__addr || !__len) + return -EFAULT; + + vppcom_endpt_t ep; + ep.ip = (u8 *) & ((const struct sockaddr_in *) __addr)->sin_addr; + rv = vcom_session_getpeername (vsock->sid, &ep); + if (rv == 0) + { + if (ep.vrf == VPPCOM_VRF_DEFAULT) + { + __addr->sa_family = ep.is_ip4 == VPPCOM_IS_IP4 ? AF_INET : AF_INET6; + switch (__addr->sa_family) + { + case AF_INET: + ((struct sockaddr_in *) __addr)->sin_port = ep.port; + *__len = sizeof (struct sockaddr_in); + break; + + case AF_INET6: + ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port; + *__len = sizeof (struct sockaddr_in6); + break; + + default: + break; + } + } + } + + return rv; +} + +ssize_t +vcom_socket_send (int __fd, const void *__buf, size_t __n, int __flags) +{ + return vcom_socket_sendto (__fd, __buf, __n, __flags, NULL, 0); +} + +ssize_t +vcom_socket_recv (int __fd, void *__buf, size_t __n, int __flags) +{ + int rv = -1; + rv = vcom_socket_recvfrom (__fd, __buf, __n, __flags, NULL, 0); + return rv; +} + +/* + * RETURN 1 if __fd is (SOCK_STREAM, SOCK_SEQPACKET), + * 0 otherwise + * */ +int +vcom_socket_is_connection_mode_socket (int __fd) +{ + int rv = -1; + /* TBD define new vppcom api */ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + int type; + socklen_t optlen; + + p = hash_get (vsm->sockidx_by_fd, __fd); + + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND) + { + optlen = sizeof (type); + rv = libc_getsockopt (__fd, SOL_SOCKET, SO_TYPE, &type, &optlen); + if (rv != 0) + { + return 0; + } + /* get socket type */ + switch (type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + { + case SOCK_STREAM: + case SOCK_SEQPACKET: + return 1; + break; + + default: + return 0; + break; + } + } + } + return 0; +} + +static inline ssize_t +vcom_session_sendto (int __sid, void *__buf, size_t __n, + int __flags, __CONST_SOCKADDR_ARG __addr, + socklen_t __addr_len) +{ + int rv = -1; + /* TBD add new vpp api */ + /* TBD add flags parameter */ + rv = vppcom_session_write (__sid, (void *) __buf, (int) __n); + return rv; +} + +ssize_t +vcom_socket_sendto (int __fd, const void *__buf, size_t __n, + int __flags, __CONST_SOCKADDR_ARG __addr, + socklen_t __addr_len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + if (!__buf) + { + return -EINVAL; + } + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + { + return -EINVAL; + } + + if (vcom_socket_is_connection_mode_socket (__fd)) + { + /* ignore __addr and _addr_len */ + /* and EISCONN may be returned when they are not NULL and 0 */ + if ((__addr != NULL) || (__addr_len != 0)) + { + return -EISCONN; + } + } + else + { + if (!__addr) + { + return -EDESTADDRREQ; + } + /* not a vppcom supported address family */ + if ((__addr->sa_family != AF_INET) || (__addr->sa_family != AF_INET6)) + { + return -EINVAL; + } + } + + rv = vcom_session_sendto (vsock->sid, (void *) __buf, (int) __n, + __flags, __addr, __addr_len); + return rv; +} + +static inline ssize_t +vcom_session_recvfrom (int __sid, void *__restrict __buf, size_t __n, + int __flags, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len) +{ + int rv = -1; + + /* TBD add flags parameter */ + rv = vppcom_session_read (__sid, __buf, __n); + return rv; +} + +ssize_t +vcom_socket_recvfrom (int __fd, void *__restrict __buf, size_t __n, + int __flags, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + if (!__buf || !__addr || !__addr_len) + { + return -EINVAL; + } + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + { + return -EINVAL; + } + + rv = vcom_session_recvfrom (vsock->sid, __buf, __n, + __flags, __addr, __addr_len); + return rv; +} + +/* TBD: move it to vppcom */ +static inline ssize_t +vcom_session_sendmsg (int __sid, const struct msghdr *__message, int __flags) +{ + int rv = -1; + /* rv = vppcom_session_write (__sid, (void *) __message->__buf, + (int)__n); */ + return rv; +} + +ssize_t +vcom_socket_sendmsg (int __fd, const struct msghdr * __message, int __flags) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vcom_socket_is_connection_mode_socket (__fd)) + { + /* ignore __addr and _addr_len */ + /* and EISCONN may be returned when they are not NULL and 0 */ + if ((__message->msg_name != NULL) || (__message->msg_namelen != 0)) + { + return -EISCONN; + } + } + else + { + /* TBD: validate __message->msg_name and __message->msg_namelen + * and return -EINVAL on validation error + * */ + ; + } + + rv = vcom_session_sendmsg (vsock->sid, __message, __flags); + + return rv; +} + +#ifdef __USE_GNU +int +vcom_socket_sendmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags) +{ + + /* TBD: define a new vppcom api */ + return 0; +} +#endif + +/* TBD: move it to vppcom */ +static inline ssize_t +vcom_session_recvmsg (int __sid, struct msghdr *__message, int __flags) +{ + int rv = -1; + /* rv = vppcom_session_read (__sid, (void *) __message->__buf, + (int)__n); */ + rv = -EOPNOTSUPP; + return rv; +} + +ssize_t +vcom_socket_recvmsg (int __fd, struct msghdr * __message, int __flags) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__message) + { + return -EINVAL; + } + + /* validate __flags */ + + rv = vcom_session_recvmsg (vsock->sid, __message, __flags); + return rv; +} + +#ifdef __USE_GNU +int +vcom_socket_recvmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags, + struct timespec *__tmo) +{ + /* TBD: define a new vppcom api */ + return 0; +} +#endif + +/* TBD: move it to vppcom */ +static inline int +vcom_session_get_sockopt (int __sid, int __level, int __optname, + void *__restrict __optval, + socklen_t * __restrict __optlen) +{ + /* 1. for socket level options that are NOT socket attributes + * and that has corresponding vpp options get from vppcom */ + return 0; +} + +int +vcom_socket_getsockopt (int __fd, int __level, int __optname, + void *__restrict __optval, + socklen_t * __restrict __optlen) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__optval && !__optlen) + return -EFAULT; + + switch (__level) + { + /* handle options at socket level */ + case SOL_SOCKET: + switch (__optname) + { +/* + * 1. for socket level options that are socket attributes, + * get from libc_getsockopt. + * 2. for socket level options that are NOT socket + * attributes and that has corresponding vpp options + * get from vppcom. + * 3. for socket level options unimplemented + * return -ENOPROTOOPT */ + case SO_DEBUG: + case SO_DONTROUTE: + case SO_BROADCAST: + case SO_SNDBUF: + case SO_RCVBUF: + case SO_REUSEADDR: + case SO_REUSEPORT: + case SO_KEEPALIVE: + case SO_TYPE: + case SO_PROTOCOL: + case SO_DOMAIN: + case SO_ERROR: + case SO_OOBINLINE: + case SO_NO_CHECK: + case SO_PRIORITY: + case SO_LINGER: + case SO_BSDCOMPAT: + case SO_TIMESTAMP: + case SO_TIMESTAMPNS: + case SO_TIMESTAMPING: + case SO_RCVTIMEO: + case SO_SNDTIMEO: + case SO_RCVLOWAT: + case SO_SNDLOWAT: + case SO_PASSCRED: + case SO_PEERCRED: + case SO_PEERNAME: + case SO_ACCEPTCONN: + case SO_PASSSEC: + case SO_PEERSEC: + case SO_MARK: + case SO_RXQ_OVFL: + case SO_WIFI_STATUS: + case SO_PEEK_OFF: + case SO_NOFCS: + case SO_BINDTODEVICE: + case SO_GET_FILTER: + case SO_LOCK_FILTER: + case SO_BPF_EXTENSIONS: + case SO_SELECT_ERR_QUEUE: +#ifdef CONFIG_NET_RX_BUSY_POLL + case SO_BUSY_POLL: +#endif + case SO_MAX_PACING_RATE: +#ifdef SO_INCOMING_CPU + case SO_INCOMING_CPU: +#endif + rv = libc_getsockopt (__fd, __level, __optname, __optval, __optlen); + if (rv != 0) + { + rv = -errno; + return rv; + } + break; + + default: + /* We implement the SO_SNDLOWAT etc to not be settable + * (1003.1g 7). + */ + return -ENOPROTOOPT; + } + + break; + + default: + /* 1. handle options that are NOT socket level options, + * but have corresponding vpp otions. */ + rv = vcom_session_get_sockopt (vsock->sid, __level, __optname, + __optval, __optlen); + break; + } + + return rv; +} + +/* TBD: move it to vppcom */ +static inline int +vcom_session_setsockopt (int __sid, int __level, int __optname, + const void *__optval, socklen_t __optlen) +{ + int rv = -EOPNOTSUPP; + + switch (__level) + { + case SOL_TCP: + switch (__optname) + { + case TCP_KEEPIDLE: + rv = + vppcom_session_attr (__sid, VPPCOM_ATTR_SET_TCP_KEEPIDLE, 0, 0); + break; + case TCP_KEEPINTVL: + rv = + vppcom_session_attr (__sid, VPPCOM_ATTR_SET_TCP_KEEPINTVL, 0, 0); + break; + default: + break; + } + break; + case SOL_IPV6: + switch (__optname) + { + case IPV6_V6ONLY: + rv = vppcom_session_attr (__sid, VPPCOM_ATTR_SET_V6ONLY, 0, 0); + break; + default: + break; + } + break; + case SOL_SOCKET: + switch (__optname) + { + case SO_KEEPALIVE: + rv = vppcom_session_attr (__sid, VPPCOM_ATTR_SET_KEEPALIVE, 0, 0); + break; + case SO_REUSEADDR: + rv = vppcom_session_attr (__sid, VPPCOM_ATTR_SET_REUSEADDR, 0, 0); + break; + case SO_BROADCAST: + rv = vppcom_session_attr (__sid, VPPCOM_ATTR_SET_BROADCAST, 0, 0); + break; + default: + break; + } + break; + default: + break; + } + + return rv; +} + +int +vcom_socket_setsockopt (int __fd, int __level, int __optname, + const void *__optval, socklen_t __optlen) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + /* + * Options without arguments + */ + + if (__optname == SO_BINDTODEVICE) + { + rv = libc_setsockopt (__fd, __level, __optname, __optval, __optlen); + if (rv != 0) + { + rv = -errno; + } + return rv; + } + + if (!__optval) + return -EFAULT; + + if (__optlen < sizeof (int)) + return -EINVAL; + + switch (__level) + { + case SOL_IPV6: + switch (__optname) + { + case IPV6_V6ONLY: + rv = vcom_session_setsockopt (vsock->sid, __level, __optname, + __optval, __optlen); + break; + default: + return -EOPNOTSUPP; + } + break; + case SOL_TCP: + switch (__optname) + { + case TCP_NODELAY: + return 0; + case TCP_KEEPIDLE: + case TCP_KEEPINTVL: + rv = vcom_session_setsockopt (vsock->sid, __level, __optname, + __optval, __optlen); + break; + default: + return -EOPNOTSUPP; + } + break; + /* handle options at socket level */ + case SOL_SOCKET: + switch (__optname) + { + case SO_REUSEADDR: + case SO_BROADCAST: + case SO_KEEPALIVE: + rv = vcom_session_setsockopt (vsock->sid, __level, __optname, + __optval, __optlen); + break; + + /* + * 1. for socket level options that are socket attributes, + * set it from libc_getsockopt + * 2. for socket level options that are NOT socket + * attributes and that has corresponding vpp options + * set it from vppcom + * 3. for socket level options unimplemented + * return -ENOPROTOOPT */ + case SO_DEBUG: + case SO_DONTROUTE: + case SO_SNDBUF: + case SO_RCVBUF: + case SO_REUSEPORT: + case SO_TYPE: + case SO_PROTOCOL: + case SO_DOMAIN: + case SO_ERROR: + case SO_OOBINLINE: + case SO_NO_CHECK: + case SO_PRIORITY: + case SO_LINGER: + case SO_BSDCOMPAT: + case SO_TIMESTAMP: + case SO_TIMESTAMPNS: + case SO_TIMESTAMPING: + case SO_RCVTIMEO: + case SO_SNDTIMEO: + case SO_RCVLOWAT: + case SO_SNDLOWAT: + case SO_PASSCRED: + case SO_PEERCRED: + case SO_PEERNAME: + case SO_ACCEPTCONN: + case SO_PASSSEC: + case SO_PEERSEC: + case SO_MARK: + case SO_RXQ_OVFL: + case SO_WIFI_STATUS: + case SO_PEEK_OFF: + case SO_NOFCS: + /* + * SO_BINDTODEVICE already handled as + * "Options without arguments" */ + /* case SO_BINDTODEVICE: */ + case SO_GET_FILTER: + case SO_LOCK_FILTER: + case SO_BPF_EXTENSIONS: + case SO_SELECT_ERR_QUEUE: +#ifdef CONFIG_NET_RX_BUSY_POLL + case SO_BUSY_POLL: +#endif + case SO_MAX_PACING_RATE: +#ifdef SO_INCOMING_CPU + case SO_INCOMING_CPU: +#endif + rv = libc_setsockopt (__fd, __level, __optname, __optval, __optlen); + if (rv != 0) + { + rv = -errno; + return rv; + } + break; + + default: + /* We implement the SO_SNDLOWAT etc to not be settable + * (1003.1g 7). + */ + return -ENOPROTOOPT; + } + + break; + + default: + return -ENOPROTOOPT; + } + + return rv; +} + +int +vcom_socket_listen (int __fd, int __n) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + + /* TBD vppcom to accept __n parameter */ + rv = vppcom_session_listen (vsock->sid, __n); + } + + return rv; +} + +static int +vcom_socket_connected_socket (int __fd, int __sid, + int *__domain, + int *__type, int *__protocol, int flags) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_socket_t *vsock; + + i32 fd; + i32 sockidx; + + socklen_t optlen; + + optlen = sizeof (*__domain); + rv = libc_getsockopt (__fd, SOL_SOCKET, SO_DOMAIN, __domain, &optlen); + if (rv != 0) + { + rv = -errno; + goto out; + } + + optlen = sizeof (*__type); + rv = libc_getsockopt (__fd, SOL_SOCKET, SO_TYPE, __type, &optlen); + if (rv != 0) + { + rv = -errno; + goto out; + } + + optlen = sizeof (*__protocol); + rv = libc_getsockopt (__fd, SOL_SOCKET, SO_PROTOCOL, __protocol, &optlen); + if (rv != 0) + { + rv = -errno; + goto out; + } + + fd = vcom_socket_open_socket (*__domain, *__type | flags, *__protocol); + if (fd < 0) + { + rv = fd; + goto out; + } + + pool_get (vsm->vsockets, vsock); + vsocket_init (vsock); + + sockidx = vsock - vsm->vsockets; + hash_set (vsm->sockidx_by_fd, fd, sockidx); + + vsocket_set (vsock, fd, __sid, SOCKET_TYPE_VPPCOM_BOUND); + return fd; + +out: + return rv; +} + +/* If flag is 0, then accept4() is the same as accept(). + * SOCK_NONBLOCK and SOCK_CLOEXEC can be bitwise ORed in flags + */ +static int +vcom_socket_accept_flags (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int flags) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + int fd; + int sid; + int domain; + int type; + int protocol; + + uint8_t addr8[sizeof (struct in6_addr)]; + vppcom_endpt_t ep; + + ep.ip = addr8; + + /* validate flags */ + + /* + * for documentation + * switch (flags) + * { + * case 0: + * case SOCK_NONBLOCK: + * case SOCK_CLOEXEC: + * case SOCK_NONBLOCK | SOCK_CLOEXEC: + * break; + * + * default: + * return -1; + * } + */ + /* flags can be 0 or can be bitwise OR + * of any of SOCK_NONBLOCK and SOCK_CLOEXEC */ + + if (!(!flags || (flags & (SOCK_NONBLOCK | SOCK_CLOEXEC)))) + { + /* TBD: return proper error code */ + return -1; + } + + /* TBD: return proper error code */ + + if (!vcom_socket_is_connection_mode_socket (__fd)) + { + return -EOPNOTSUPP; + } + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + + + rv = vcom_fcntl (vsock->fd, F_GETFL, 0); + if (rv < 0) + { + return rv; + } + + /* is blocking */ + if (!(rv & O_NONBLOCK)) + { + /* socket is not marked as nonblocking + * and no pending connections are present + * on the queue, accept () blocks the caller + * until a connection is present. + */ + rv = vppcom_session_accept (vsock->sid, &ep, + -1.0 /* wait forever */ ); + } + else + { + /* The file descriptor refers to a socket and has been + * marked nonblocking(O_NONBLOCK) and the accept would + * block. + * */ + /* is non blocking */ + rv = vppcom_session_accept (vsock->sid, &ep, 0); + /* If the socket is marked nonblocking and + * no pending connections are present on the + * queue, accept fails with the error + * EAGAIN or EWOULDBLOCK + */ + if (rv == VPPCOM_ETIMEDOUT) + { + rv = VPPCOM_EAGAIN; + } + } + if (rv < 0) + { + return rv; + } + + sid = rv; + + /* create a new connected socket resource and set flags + * on the new file descriptor. + * update vsockets and sockidx_by_fd table + * */ + fd = vcom_socket_connected_socket (__fd, sid, + &domain, &type, &protocol, flags); + if (fd < 0) + { + return fd; + } + + rv = fd; + + /* TBD populate __addr and __addr_len */ + /* TBD: The returned address is truncated if the buffer + * provided is too small, in this case, __addr_len will + * return a value greater than was supplied to the call.*/ + if (__addr) + { + if (ep.is_cut_thru) + { + /* TBD populate __addr and __addr_len */ + switch (domain) + { + case AF_INET: + ((struct sockaddr_in *) __addr)->sin_family = AF_INET; + ((struct sockaddr_in *) __addr)->sin_port = ep.port; + memcpy (&((struct sockaddr_in *) __addr)->sin_addr, + addr8, sizeof (struct in_addr)); + /* TBD: populate __addr_len */ + if (__addr_len) + { + *__addr_len = sizeof (struct sockaddr_in); + } + break; + + case AF_INET6: + ((struct sockaddr_in6 *) __addr)->sin6_family = AF_INET6; + ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port; + memcpy (((struct sockaddr_in6 *) __addr)->sin6_addr. + __in6_u.__u6_addr8, addr8, + sizeof (struct in6_addr)); + /* TBD: populate __addr_len */ + if (__addr_len) + { + *__addr_len = sizeof (struct sockaddr_in6); + } + break; + + default: + return -EAFNOSUPPORT; + } + } + else + { + switch (ep.is_ip4) + { + case VPPCOM_IS_IP4: + ((struct sockaddr_in *) __addr)->sin_family = AF_INET; + ((struct sockaddr_in *) __addr)->sin_port = ep.port; + memcpy (&((struct sockaddr_in *) __addr)->sin_addr, + addr8, sizeof (struct in_addr)); + /* TBD: populate __addr_len */ + if (__addr_len) + { + *__addr_len = sizeof (struct sockaddr_in); + } + break; + + case VPPCOM_IS_IP6: + ((struct sockaddr_in6 *) __addr)->sin6_family = AF_INET6; + ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port; + memcpy (((struct sockaddr_in6 *) __addr)->sin6_addr. + __in6_u.__u6_addr8, addr8, + sizeof (struct in6_addr)); + /* TBD: populate __addr_len */ + if (__addr_len) + { + *__addr_len = sizeof (struct sockaddr_in6); + } + break; + + default: + return -EAFNOSUPPORT; + } + } + } + else + { + /* when __addr is NULL, nothing is filled in, + * in this case, __addr_len is not used, + * and should also be null + * */ + if (__addr_len) + { + /* TBD: return proper error code */ + return -1; + } + } + } + + return rv; +} + +int +vcom_socket_accept (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len) +{ + /* set flags to 0 for accept() */ + return vcom_socket_accept_flags (__fd, __addr, __addr_len, 0); +} + +#ifdef __USE_GNU +int +vcom_socket_accept4 (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int __flags) +{ + /* SOCK_NONBLOCK and SOCK_CLOEXEC can be bitwise ORed in flags */ + return vcom_socket_accept_flags (__fd, __addr, __addr_len, __flags); +} +#endif + +/* TBD: move it to vppcom */ +static inline int +vcom_session_shutdown (int __fd, int __how) +{ + return 0; +} + +int +vcom_socket_shutdown (int __fd, int __how) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + switch (__how) + { + case SHUT_RD: + case SHUT_WR: + case SHUT_RDWR: + rv = vcom_session_shutdown (vsock->sid, __how); + return rv; + break; + + default: + return -EINVAL; + break; + } + } + + return rv; +} + +int +vcom_socket_epoll_create1 (int __flags) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_epoll_t *vepoll; + + i32 epfd; + i32 vep_idx; + i32 epollidx; + + epfd = vcom_socket_open_epoll (__flags); + if (epfd < 0) + { + rv = epfd; + goto out; + } + + vep_idx = vppcom_epoll_create (); + if (vep_idx < 0) + { + rv = vep_idx; + goto out_close_epoll; + } + + pool_get (vsm->vepolls, vepoll); + vepoll_init (vepoll); + + epollidx = vepoll - vsm->vepolls; + hash_set (vsm->epollidx_by_epfd, epfd, epollidx); + + vepoll_set (vepoll, epfd, vep_idx, EPOLL_TYPE_VPPCOM_BOUND, __flags, 0, 0); + + return epfd; + +out_close_epoll: + vcom_socket_close_epoll (epfd); +out: + return rv; +} + +/* + * PRE: vppcom_epoll_ctl() is successful + * free_vepitem_on_del : 0 - no_pool_put, 1 - pool_put + */ +int +vcom_socket_ctl_vepitem (int __epfd, int __op, int __fd, + struct epoll_event *__event, + i32 vep_idx, vcom_epoll_t * vepoll, + i32 vfd_id, void *vfd, vcom_fd_type_t type, + int free_vepitem_on_del) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_epitem_t *vepitem; + + vcom_epitem_key_t epfdfd = {.epfd = __epfd,.fd = __fd }; + uword *p; + i32 vepitemidx; + + i32 *vepitemidxs = 0; + + struct epoll_event revent = {.events = 0,.data.fd = INVALID_FD }; + + i32 vec_idx; + + /* perform control operations on the epoll instance */ + switch (__op) + { + case EPOLL_CTL_ADD: + /* + * supplied file descriptor is already + * registered with this epoll instance + * */ + /* vepitem exists */ + p = hash_get (vsm->epitemidx_by_epfdfd, epfdfd.key); + if (p) + { + rv = -EEXIST; + goto out; + } + + /* add a new vepitem */ + pool_get (vsm->vepitems, vepitem); + vepitem_init (vepitem); + + vepitemidx = vepitem - vsm->vepitems; + hash_set (vsm->epitemidx_by_epfdfd, epfdfd.key, vepitemidx); + vepitem_set (vepitem, __epfd, __fd, __fd, __fd, type, *__event, revent); + + /* update epitemidxs */ + /* by_epfd */ + p = hash_get (vsm->epitemidxs_by_epfd, __epfd); + if (!p) /* not exist */ + { + vepitemidxs = 0; + vec_add1 (vepitemidxs, vepitemidx); + hash_set (vsm->epitemidxs_by_epfd, __epfd, vepitemidxs); + } + else /* exists */ + { + vepitemidxs = *(i32 **) p; + vec_add1 (vepitemidxs, vepitemidx); + hash_set3 (vsm->epitemidxs_by_epfd, __epfd, vepitemidxs, 0); + } + /* update epitemidxs */ + /* by_fd */ + p = hash_get (vsm->epitemidxs_by_fd, __fd); + if (!p) /* not exist */ + { + vepitemidxs = 0; + vec_add1 (vepitemidxs, vepitemidx); + hash_set (vsm->epitemidxs_by_fd, __fd, vepitemidxs); + } + else /* exists */ + { + vepitemidxs = *(i32 **) p; + vec_add1 (vepitemidxs, vepitemidx); + hash_set3 (vsm->epitemidxs_by_fd, __fd, vepitemidxs, 0); + } + + /* increment vepoll fd count by 1 */ + vepoll->count += 1; + + rv = 0; + goto out; + break; + + case EPOLL_CTL_MOD: + /* + * supplied file descriptor is not + * registered with this epoll instance + * */ + /* vepitem not exist */ + p = hash_get (vsm->epitemidx_by_epfdfd, epfdfd.key); + if (!p) + { + rv = -ENOENT; + goto out; + } + vepitem = pool_elt_at_index (vsm->vepitems, p[0]); + if (vepitem) + { + vepitem->event = *__event; + vepitem->revent = revent; + } + + rv = 0; + goto out; + break; + + case EPOLL_CTL_DEL: + /* + * supplied file descriptor is not + * registered with this epoll instance + * */ + /* vepitem not exist */ + p = hash_get (vsm->epitemidx_by_epfdfd, epfdfd.key); + if (!p) + { + rv = -ENOENT; + goto out; + } + vepitemidx = *(i32 *) p; + hash_unset (vsm->epitemidx_by_epfdfd, epfdfd.key); + + /* update epitemidxs */ + /* by_epfd */ + p = hash_get (vsm->epitemidxs_by_epfd, __epfd); + if (!p) /* not exist */ + { + rv = -ENOENT; + goto out; + } + else /* exists */ + { + vepitemidxs = *(i32 **) p; + vec_idx = vec_search (vepitemidxs, vepitemidx); + if (vec_idx != ~0) + { + vec_del1 (vepitemidxs, vec_idx); + if (!vec_len (vepitemidxs)) + { + vec_free (vepitemidxs); + hash_unset (vsm->epitemidxs_by_epfd, __epfd); + } + } + } + + /* update epitemidxs */ + /* by_fd */ + p = hash_get (vsm->epitemidxs_by_fd, __fd); + if (!p) /* not exist */ + { + rv = -ENOENT; + goto out; + } + else /* exists */ + { + vepitemidxs = *(i32 **) p; + vec_idx = vec_search (vepitemidxs, vepitemidx); + if (vec_idx != ~0) + { + vec_del1 (vepitemidxs, vec_idx); + if (!vec_len (vepitemidxs)) + { + vec_free (vepitemidxs); + hash_unset (vsm->epitemidxs_by_fd, __fd); + } + } + } + + /* pool put vepitem */ + vepitem = pool_elt_at_index (vsm->vepitems, vepitemidx); + if (free_vepitem_on_del) + { + if (!vepitem) + { + rv = -ENOENT; + goto out; + } + vepitem_init (vepitem); + pool_put (vsm->vepitems, vepitem); + } + else + { + if (!vepitem) + { + vepitem_init (vepitem); + } + } + + /* decrement vepoll fd count by 1 */ + vepoll->count -= 1; + + rv = 0; + goto out; + break; + + default: + rv = -EINVAL; + goto out; + break; + } + +out: + return rv; +} + +/* + * PRE: 00. null pointer check on __event + * 01. all other parameters are validated + */ + +static int +vcom_socket_epoll_ctl_internal (int __epfd, int __op, int __fd, + struct epoll_event *__event, + int free_vepitem_on_del) +{ + int rv = -1; + + /* vcom_socket_main_t *vsm = &vcom_socket_main; */ + vcom_epoll_t *vepoll; + + /*__fd could could be vcom socket or vcom epoll or kernel fd */ + void *vfd; + vcom_epoll_t *vfd_vepoll; + vcom_socket_t *vfd_vsock; + + i32 vep_idx; + i32 vfd_id; + + vcom_fd_type_t type = FD_TYPE_INVALID; + + /* validate __event */ + + /* get vep_idx and vepoll */ + vep_idx = vcom_socket_get_vep_idx_and_vepoll (__epfd, &vepoll); + if (vep_idx == INVALID_VEP_IDX) + { + return -EBADF; + } + + /* get vcom fd type, vfd_id and vfd */ + vfd_id = vcom_socket_get_sid_and_vsock (__fd, &vfd_vsock); + if (vfd_id != INVALID_SESSION_ID) + { + type = FD_TYPE_VCOM_SOCKET; + vfd = vfd_vsock; + } + else if ((vfd_id = vcom_socket_get_vep_idx_and_vepoll (__fd, &vfd_vepoll)) + != INVALID_VEP_IDX) + { + type = FD_TYPE_EPOLL; + vfd = vfd_vepoll; + } + else + { + /* FD_TYPE_KERNEL not supported by epoll instance */ + type = FD_TYPE_INVALID; + return -EBADF; + } + + + /* vepoll and vsock are now valid */ + rv = vppcom_epoll_ctl (vep_idx, __op, vfd_id, __event); + if (rv < 0) + { + return rv; + } + + rv = vcom_socket_ctl_vepitem (__epfd, __op, __fd, + __event, + vep_idx, vepoll, + vfd_id, vfd, type, free_vepitem_on_del); + return rv; +} + +int +vcom_socket_epoll_ctl (int __epfd, int __op, int __fd, + struct epoll_event *__event) +{ + int rv = -1; + + rv = vcom_socket_epoll_ctl_internal (__epfd, __op, __fd, __event, 1); + return rv; +} + +static int +vcom_socket_epoll_ctl1 (int __epfd, int __op, int __fd, + struct epoll_event *__event) +{ + int rv = -1; + + rv = vcom_socket_epoll_ctl_internal (__epfd, __op, __fd, __event, 0); + return rv; +} + +int +vcom_socket_epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, + const __sigset_t * __ss) +{ + int rv = -EBADF; + + /* in seconds eg. 3.123456789 seconds */ + double time_to_wait = (double) 0; + + i32 vep_idx; + + /* validate __event */ + if (!__events) + { + rv = -EFAULT; + goto out; + } + + /* validate __timeout */ + if (__timeout > 0) + { + time_to_wait = (double) __timeout / (double) 1000; + } + else if (__timeout == 0) + { + time_to_wait = (double) 0; + } + else if (__timeout == -1) + { + time_to_wait = ~0; + } + else + { + rv = -EBADF; + goto out; + } + + /* get vep_idx */ + vep_idx = vcom_socket_get_vep_idx (__epfd); + if (vep_idx != INVALID_VEP_IDX) + { + rv = vppcom_epoll_wait (vep_idx, __events, __maxevents, time_to_wait); + } +out: + return rv; +} + +static inline void +vcom_pollfds_2_selectfds ( + /* src */ + struct pollfd *__fds, nfds_t __nfds, + /* dest */ + int vcom_nfds, + fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds) +{ + nfds_t fds_idx = 0; + + for (fds_idx = 0; fds_idx < __nfds; fds_idx++) + { + /* ignore negative fds */ + if (__fds[fds_idx].fd < 0) + { + continue; + } + + /* for POLLRDHUP, POLLERR, POLLHUP and POLLNVAL */ + FD_SET (__fds[fds_idx].fd, vcom_exceptfds); + + /* requested events */ + if (__fds[fds_idx].events) + { + if (__fds[fds_idx].events & POLLIN) + { + FD_SET (__fds[fds_idx].fd, vcom_readfds); + } + if (__fds[fds_idx].events & POLLPRI) + { + FD_SET (__fds[fds_idx].fd, vcom_readfds); + } + if (__fds[fds_idx].events & POLLOUT) + { + FD_SET (__fds[fds_idx].fd, vcom_writefds); + } +#if defined __USE_XOPEN || defined __USE_XOPEN2K8 + if (__fds[fds_idx].events & POLLRDNORM) + { + FD_SET (__fds[fds_idx].fd, vcom_readfds); + } + if (__fds[fds_idx].events & POLLRDBAND) + { + FD_SET (__fds[fds_idx].fd, vcom_readfds); + } + if (__fds[fds_idx].events & POLLWRNORM) + { + FD_SET (__fds[fds_idx].fd, vcom_writefds); + } + if (__fds[fds_idx].events & POLLWRBAND) + { + FD_SET (__fds[fds_idx].fd, vcom_writefds); + } +#endif + } + } /* for (fds_idx = 0; fds_idx < __nfds; fds_idx++) */ +} + +static inline void +vcom_selectfds_2_pollfds ( + /* dest */ + struct pollfd *__fds, nfds_t __nfds, int *nfd, + /* src */ + int vcom_nfds, + fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds) +{ + nfds_t fds_idx = 0; + + + for (fds_idx = 0; fds_idx < __nfds; fds_idx++) + { + /* ignore negative fds */ + if (__fds[fds_idx].fd < 0) + { + __fds[fds_idx].revents = 0; + } + + /* for POLLRDHUP, POLLERR, POLLHUP and POLLNVAL */ + if (FD_ISSET (__fds[fds_idx].fd, vcom_exceptfds)) + { + /* + * TBD: for now any select exception + * is flagged as POLLERR + * */ + __fds[fds_idx].revents |= POLLERR; + } + + /* requested events */ + if (__fds[fds_idx].events & POLLIN) + { + if (FD_ISSET (__fds[fds_idx].fd, vcom_readfds)) + { + __fds[fds_idx].revents |= POLLIN; + } + } + if (__fds[fds_idx].events & POLLPRI) + { + if (FD_ISSET (__fds[fds_idx].fd, vcom_readfds)) + { + __fds[fds_idx].revents |= POLLIN; + } + } + if (__fds[fds_idx].events & POLLOUT) + { + if (FD_ISSET (__fds[fds_idx].fd, vcom_writefds)) + { + __fds[fds_idx].revents |= POLLOUT; + } + } +#if defined __USE_XOPEN || defined __USE_XOPEN2K8 + if (__fds[fds_idx].events & POLLRDNORM) + { + if (FD_ISSET (__fds[fds_idx].fd, vcom_readfds)) + { + __fds[fds_idx].revents |= POLLRDNORM; + } + } + if (__fds[fds_idx].events & POLLRDBAND) + { + if (FD_ISSET (__fds[fds_idx].fd, vcom_readfds)) + { + __fds[fds_idx].revents |= POLLRDBAND; + } + } + if (__fds[fds_idx].events & POLLWRNORM) + { + if (FD_ISSET (__fds[fds_idx].fd, vcom_writefds)) + { + __fds[fds_idx].revents |= POLLWRNORM; + } + } + if (__fds[fds_idx].events & POLLWRBAND) + { + if (FD_ISSET (__fds[fds_idx].fd, vcom_writefds)) + { + __fds[fds_idx].revents |= POLLWRBAND; + } + } +#endif + } /* for (fds_idx = 0; fds_idx < __nfds; fds_idx++) */ + + /* + * nfd: + * the number of structures which have nonzero revents fields + * (in other words, those descriptors with events or + * errors reported) + * */ + *nfd = 0; + for (fds_idx = 0; fds_idx < __nfds; fds_idx++) + { + /* ignore negative fds */ + if (__fds[fds_idx].fd < 0) + { + continue; + } + + if (__fds[fds_idx].revents) + { + (*nfd)++; + } + } +} + +/* + * PRE: parameters are validated, + * vcom_socket_poll is always called with __timeout set to zero + * hence returns immediately + * + * ACTION: handle non negative validated vcom fds and ignore rest + */ + +/* + * implements vcom_socket_poll () interface + * + * internally uses vcom_socket_select () + * to realize the behavior + * */ +int +vcom_socket_poll_select_impl (struct pollfd *__fds, nfds_t __nfds, + int __timeout) +{ + int rv; + pid_t pid = getpid (); + + nfds_t fds_idx = 0; + int nfd = 0; + + /* vcom */ + int vcom_nfds = 0; + fd_set vcom_readfds; + fd_set vcom_writefds; + fd_set vcom_exceptfds; + int vcom_nfd = -1; + /* invalid max_vcom_fd is -1 */ + int max_vcom_fd = -1; + + /* __timeout is zero to get ready events and return immediately */ + struct timeval tv = {.tv_sec = 0,.tv_usec = 0 }; + + /* validate __nfds from select perspective */ + if (__nfds > FD_SETSIZE) + { + rv = -EINVAL; + goto poll_done; + } + + /* zero vcom fd sets */ + /* + * V vcom fd set + */ +#define _(V) \ + FD_ZERO ((V)) + + _(&vcom_readfds); + _(&vcom_writefds); + _(&vcom_exceptfds); +#undef _ + + vcom_nfds = 0; + vcom_nfd = -1; + + + for (fds_idx = 0; fds_idx < __nfds; fds_idx++) + { + /* ignore negative fds */ + if (__fds[fds_idx].fd < 0) + { + continue; + } + + /* non negative validated vcom fds */ + if (__fds[fds_idx].fd > FD_SETSIZE) + { + rv = -EINVAL; + goto poll_done; + } + + /* max_vcom_fd and vcom_nfd */ + if (__fds[fds_idx].fd > max_vcom_fd) + { + /* requested events */ + if (__fds[fds_idx].events) + { + max_vcom_fd = __fds[fds_idx].fd; + } + } + ++vcom_nfd; + } + + vcom_nfds = max_vcom_fd != -1 ? max_vcom_fd + 1 : 0; + + if (!vcom_nfds) + { + rv = vcom_nfds; + goto poll_done; + } + + vcom_pollfds_2_selectfds ( + /* src */ + __fds, __nfds, + /* dest */ + vcom_nfds, + &vcom_readfds, &vcom_writefds, &vcom_exceptfds); + + /* select on vcom fds */ + vcom_nfd = vcom_socket_select (vcom_nfds, + &vcom_readfds, + &vcom_writefds, &vcom_exceptfds, &tv); + if (VCOM_DEBUG > 2) + fprintf (stderr, + "[%d] vcom_socket_select: " + "'%04d'='%04d'\n", pid, vcom_nfd, vcom_nfds); + + if (vcom_nfd < 0) + { + rv = vcom_nfd; + goto poll_done; + } + + vcom_selectfds_2_pollfds ( + /* dest */ + __fds, __nfds, &nfd, + /* src */ + vcom_nfds, + &vcom_readfds, &vcom_writefds, &vcom_exceptfds); + + rv = nfd; + +poll_done: + return rv; +} + +/* + * TBD: remove this static function once vppcom + * has an implementation in place + * + * ACTION: + */ +static int +vppcom_poll (struct pollfd *__fds, nfds_t __nfds, double time_to_wait) +{ + return -EOPNOTSUPP; +} + +int +vcom_socket_poll_vppcom_impl (struct pollfd *__fds, nfds_t __nfds, + int __timeout) +{ + nfds_t fds_idx = 0; + + /* in seconds eg. 3.123456789 seconds */ + double time_to_wait = (double) 0; + + i32 sid; + i32 vep_idx; + + /* replace vcom fd with session idx */ + for (fds_idx = 0; fds_idx < __nfds; fds_idx++) + { + /* ignore negative fds */ + if (__fds[fds_idx].fd < 0) + { + continue; + } + + /* non negative validated vcom fds */ + sid = vcom_socket_get_sid (__fds[fds_idx].fd); + if (sid != INVALID_SESSION_ID) + { + __fds[fds_idx].fd = sid; + } + else + { + /* get vep_idx */ + vep_idx = vcom_socket_get_vep_idx (__fds[fds_idx].fd); + if (vep_idx != INVALID_VEP_IDX) + { + __fds[fds_idx].fd = vep_idx; + } + else + { + return -EBADF; + } + } + } + + /* validate __timeout */ + if (__timeout > 0) + { + time_to_wait = (double) __timeout / (double) 1000; + } + else if (__timeout == 0) + { + time_to_wait = (double) 0; + } + else if (__timeout < 0) + { + time_to_wait = ~0; + } + else + { + return -EBADF; + } + + return vppcom_poll (__fds, __nfds, time_to_wait); +} + +int +vcom_socket_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout) +{ + /* select an implementation */ + + /* return vcom_socket_poll_vppcom_impl (__fds, __nfds, __timeout); */ + return vcom_socket_poll_select_impl (__fds, __nfds, __timeout); +} + +#ifdef __USE_GNU +int +vcom_socket_ppoll (struct pollfd *__fds, nfds_t __nfds, + const struct timespec *__timeout, const __sigset_t * __ss) +{ + return -EOPNOTSUPP; +} +#endif + +int +vcom_socket_main_init (void) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + + if (VCOM_DEBUG > 0) + printf ("vcom_socket_main_init\n"); + + if (!vsm->init) + { + /* TBD: define FD_MAXSIZE and use it here */ + pool_alloc (vsm->vsockets, FD_SETSIZE); + vsm->sockidx_by_fd = hash_create (0, sizeof (i32)); + + pool_alloc (vsm->vepolls, FD_SETSIZE); + vsm->epollidx_by_epfd = hash_create (0, sizeof (i32)); + + pool_alloc (vsm->vepitems, FD_SETSIZE); + vsm->epitemidx_by_epfdfd = hash_create (0, sizeof (i32)); + + vsm->epitemidxs_by_epfd = hash_create (0, sizeof (i32 *)); + vsm->epitemidxs_by_fd = hash_create (0, sizeof (i32 *)); + + vsm->init = 1; + } + + return 0; +} + + +void +vcom_socket_main_show (void) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_socket_t *vsock; + + vcom_epoll_t *vepoll; + + vcom_epitem_t *vepitem; + + i32 epfd; + i32 fd; + i32 *vepitemidxs, *vepitemidxs_var; + + if (vsm->init) + { + /* from active list of vsockets show vsock */ + + /* *INDENT-OFF* */ + pool_foreach (vsock, vsm->vsockets, + ({ + printf( + "fd='%04d', sid='%08x',type='%-30s'\n", + vsock->fd, vsock->sid, + vcom_socket_type_str (vsock->type)); + })); + /* *INDENT-ON* */ + + /* from active list of vepolls, show vepoll */ + + /* *INDENT-OFF* */ + pool_foreach (vepoll, vsm->vepolls, + ({ + printf( + "epfd='%04d', vep_idx='%08x', " + "type='%-30s', " + "flags='%d', count='%d', close='%d'\n", + vepoll->epfd, vepoll->vep_idx, + vcom_socket_epoll_type_str (vepoll->type), + vepoll->flags, vepoll->count, vepoll->close); + })); + /* *INDENT-ON* */ + + /* from active list of vepitems, show vepitem */ + + /* *INDENT-OFF* */ + pool_foreach (vepitem, vsm->vepitems, + ({ + printf( + "epfd='%04d', fd='%04d', " + "next_fd='%04d', prev_fd='%04d', " + "type='%-30s', " + "events='%04x', revents='%04x'\n", + vepitem->epfd, vepitem->fd, + vepitem->next_fd, vepitem->prev_fd, + vcom_socket_vcom_fd_type_str (vepitem->type), + vepitem->event.events, vepitem->revent.events); + })); + + /* *INDENT-ON* */ + + /* show epitemidxs for epfd */ + /* *INDENT-OFF* */ + hash_foreach (epfd, vepitemidxs, + vsm->epitemidxs_by_epfd, + ({ + printf("\n[ '%04d': ", epfd); + vec_foreach (vepitemidxs_var,vepitemidxs) + { + printf("'%04d' ", (int)vepitemidxs_var[0]); + } + printf("]\n"); + })); + /* *INDENT-ON* */ + + /* show epitemidxs for fd */ + /* *INDENT-OFF* */ + hash_foreach (fd, vepitemidxs, + vsm->epitemidxs_by_fd, + ({ + printf("\n{ '%04d': ", fd); + vec_foreach (vepitemidxs_var,vepitemidxs) + { + printf("'%04d' ", (int)vepitemidxs_var[0]); + } + printf("}\n"); + })); + /* *INDENT-ON* */ + + } +} + +void +vcom_socket_main_destroy (void) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_socket_t *vsock; + + vcom_epoll_t *vepoll; + + vcom_epitem_t *vepitem; + + i32 epfd; + i32 fd; + i32 *vepitemidxs; + + + if (VCOM_DEBUG > 0) + printf ("vcom_socket_main_destroy\n"); + + if (vsm->init) + { + + /* + * from active list of vepitems, + * remove all "vepitem" elements from the pool in a safe way + * */ + + /* *INDENT-OFF* */ + pool_flush (vepitem, vsm->vepitems, + ({ + if (vepitem->type == FD_TYPE_EPOLL || FD_TYPE_VCOM_SOCKET) + { + vcom_socket_epoll_ctl1 (vepitem->epfd, EPOLL_CTL_DEL, + vepitem->fd, NULL); + vepitem_init (vepitem); + } + })); + /* *INDENT-ON* */ + + pool_free (vsm->vepitems); + hash_free (vsm->epitemidx_by_epfdfd); + + /* free vepitemidxs for each epfd */ + /* *INDENT-OFF* */ + hash_foreach (epfd, vepitemidxs, + vsm->epitemidxs_by_epfd, + ({ + vec_free (vepitemidxs); + })); + /* *INDENT-ON* */ + hash_free (vsm->epitemidxs_by_epfd); + + /* free vepitemidxs for each fd */ + /* *INDENT-OFF* */ + hash_foreach (fd, vepitemidxs, + vsm->epitemidxs_by_fd, + ({ + vec_free (vepitemidxs); + })); + /* *INDENT-ON* */ + hash_free (vsm->epitemidxs_by_fd); + + + /* + * from active list of vsockets, + * close socket and vppcom session + * */ + + /* *INDENT-OFF* */ + pool_foreach (vsock, vsm->vsockets, + ({ + if (vsock->type == SOCKET_TYPE_VPPCOM_BOUND) + { + vppcom_session_close (vsock->sid); + vcom_socket_close_socket (vsock->fd); + vsocket_init (vsock); + } + })); + /* *INDENT-ON* */ + + /* + * return vsocket element to the pool + * */ + + /* *INDENT-OFF* */ + pool_flush (vsock, vsm->vsockets, + ({ + // vsocket_init(vsock); + ; + })); + /* *INDENT-ON* */ + + pool_free (vsm->vsockets); + hash_free (vsm->sockidx_by_fd); + + /* + * from active list of vepolls, + * close epoll and vppcom_epoll + * */ + + /* *INDENT-OFF* */ + pool_foreach (vepoll, vsm->vepolls, + ({ + if (vepoll->type == EPOLL_TYPE_VPPCOM_BOUND) + { + vppcom_session_close (vepoll->vep_idx); + vcom_socket_close_epoll (vepoll->epfd); /* TBD: */ + vepoll_init (vepoll); + } + })); + /* *INDENT-ON* */ + + /* + * return vepoll element to the pool + * */ + + /* *INDENT-OFF* */ + pool_flush (vepoll, vsm->vepolls, + ({ + // vepoll_init(vepoll); + ; + })); + /* *INDENT-ON* */ + + pool_free (vsm->vepolls); + hash_free (vsm->epollidx_by_epfd); + + vsm->init = 0; + } +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |