diff options
author | Dave Wallace <dwallacelf@gmail.com> | 2017-10-24 04:12:18 -0400 |
---|---|---|
committer | Keith Burns <alagalah@gmail.com> | 2017-10-24 18:25:31 +0000 |
commit | 5c7cf1cc5358d137160be1619981e7eea9a7402f (patch) | |
tree | e981f6fc831e9c4e240e765d506ac8d58be43123 /src/vcl | |
parent | 4c151b5fa13312639b0a5ef0d0efb1866bf4ed5e (diff) |
VCL-LDPRELOAD: statically link vppcom into libvcl-ldpreload.so
- Move VCL & VCL-LDPRELOAD source into src/vcl
- Statically link vppcom into libvcl-ldpreload.so
Change-Id: I778300b37e8b06640d9dbc01caf297edf7a6edb7
Signed-off-by: Dave Wallace <dwallacelf@gmail.com>
Diffstat (limited to 'src/vcl')
-rw-r--r-- | src/vcl/sock_test.h | 415 | ||||
-rw-r--r-- | src/vcl/sock_test_client.c | 1076 | ||||
-rw-r--r-- | src/vcl/sock_test_server.c | 790 | ||||
-rw-r--r-- | src/vcl/vcl_test_client.c | 27 | ||||
-rw-r--r-- | src/vcl/vcl_test_server.c | 27 | ||||
-rw-r--r-- | src/vcl/vcom.c | 3334 | ||||
-rw-r--r-- | src/vcl/vcom.h | 204 | ||||
-rw-r--r-- | src/vcl/vcom_glibc_socket.h | 351 | ||||
-rw-r--r-- | src/vcl/vcom_socket.c | 3443 | ||||
-rw-r--r-- | src/vcl/vcom_socket.h | 471 | ||||
-rw-r--r-- | src/vcl/vcom_socket_wrapper.c | 906 | ||||
-rw-r--r-- | src/vcl/vcom_socket_wrapper.h | 235 | ||||
-rw-r--r-- | src/vcl/vppcom.c | 3258 | ||||
-rw-r--r-- | src/vcl/vppcom.h | 171 | ||||
-rw-r--r-- | src/vcl/vppcom_test.conf | 25 |
15 files changed, 14733 insertions, 0 deletions
diff --git a/src/vcl/sock_test.h b/src/vcl/sock_test.h new file mode 100644 index 00000000000..281ba6fd473 --- /dev/null +++ b/src/vcl/sock_test.h @@ -0,0 +1,415 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __sock_test_h__ +#define __sock_test_h__ + +#include <netdb.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> + +#define SOCK_TEST_TOKEN_HELP "#H" +#define SOCK_TEST_TOKEN_EXIT "#X" +#define SOCK_TEST_TOKEN_VERBOSE "#V" +#define SOCK_TEST_TOKEN_TXBUF_SIZE "#T:" +#define SOCK_TEST_TOKEN_NUM_TEST_SCKTS "#I:" +#define SOCK_TEST_TOKEN_NUM_WRITES "#N:" +#define SOCK_TEST_TOKEN_RXBUF_SIZE "#R:" +#define SOCK_TEST_TOKEN_SHOW_CFG "#C" +#define SOCK_TEST_TOKEN_RUN_UNI "#U" +#define SOCK_TEST_TOKEN_RUN_BI "#B" + +#define SOCK_TEST_BANNER_STRING \ + "============================================\n" +#define SOCK_TEST_SEPARATOR_STRING \ + " -----------------------------\n" + +#define ONE_GIG (1024*1024*1024) +#define SOCK_TEST_SERVER_PORT 22000 +#define SOCK_TEST_LOCALHOST_IPADDR "127.0.0.1" + +#define SOCK_TEST_CFG_CTRL_MAGIC 0xfeedface +#define SOCK_TEST_CFG_NUM_WRITES_DEF 1000000 +#define SOCK_TEST_CFG_TXBUF_SIZE_DEF 8192 +#define SOCK_TEST_CFG_RXBUF_SIZE_DEF (64*SOCK_TEST_CFG_TXBUF_SIZE_DEF) +#define SOCK_TEST_CFG_BUF_SIZE_MIN 128 +#define SOCK_TEST_CFG_MAX_TEST_SCKTS 5 + +typedef enum +{ + SOCK_TEST_TYPE_NONE, + SOCK_TEST_TYPE_ECHO, + SOCK_TEST_TYPE_UNI, + SOCK_TEST_TYPE_BI, + SOCK_TEST_TYPE_EXIT, +} sock_test_t; + +typedef struct __attribute__ ((packed)) +{ + uint32_t magic; + uint32_t test; + uint32_t ctrl_handle; + uint32_t num_test_sockets; + uint32_t verbose; + uint64_t rxbuf_size; + uint64_t txbuf_size; + uint64_t num_writes; + uint64_t total_bytes; +} sock_test_cfg_t; + +typedef struct +{ + uint64_t rx_xacts; + uint64_t rx_bytes; + uint32_t rx_eagain; + uint32_t rx_incomp; + uint64_t tx_xacts; + uint64_t tx_bytes; + uint32_t tx_eagain; + uint32_t tx_incomp; + struct timespec start; + struct timespec stop; +} sock_test_stats_t; + +typedef struct +{ + int fd; + uint32_t txbuf_size; + char *txbuf; + uint32_t rxbuf_size; + char *rxbuf; + sock_test_cfg_t cfg; + sock_test_stats_t stats; +} sock_test_socket_t; + +static inline void +sock_test_stats_accumulate (sock_test_stats_t * accum, + sock_test_stats_t * incr) +{ + accum->rx_xacts += incr->rx_xacts; + accum->rx_bytes += incr->rx_bytes; + accum->rx_eagain += incr->rx_eagain; + accum->rx_incomp += incr->rx_incomp; + accum->tx_xacts += incr->tx_xacts; + accum->tx_bytes += incr->tx_bytes; + accum->tx_eagain += incr->tx_eagain; + accum->tx_incomp += incr->tx_incomp; +} + +static inline void +sock_test_cfg_init (sock_test_cfg_t *cfg) +{ + cfg->magic = SOCK_TEST_CFG_CTRL_MAGIC; + cfg->test = SOCK_TEST_TYPE_NONE; + cfg->ctrl_handle = ~0; + cfg->num_test_sockets = 1; + cfg->verbose = 0; + cfg->rxbuf_size = SOCK_TEST_CFG_RXBUF_SIZE_DEF; + cfg->num_writes = SOCK_TEST_CFG_NUM_WRITES_DEF; + cfg->txbuf_size = SOCK_TEST_CFG_TXBUF_SIZE_DEF; + cfg->total_bytes = cfg->num_writes * cfg->txbuf_size; +} + +static inline int +sock_test_cfg_verify (sock_test_cfg_t *cfg, sock_test_cfg_t *valid_cfg) +{ + /* Note: txbuf & rxbuf on server are the same buffer, + * so txbuf_size is not included in this check. + */ + return ((cfg->magic == valid_cfg->magic) + && (cfg->test == valid_cfg->test) + && (cfg->verbose == valid_cfg->verbose) + && (cfg->rxbuf_size == valid_cfg->rxbuf_size) + && (cfg->num_writes == valid_cfg->num_writes) + && (cfg->total_bytes == valid_cfg->total_bytes)); +} + +static inline void +sock_test_buf_alloc (sock_test_cfg_t *cfg, uint8_t is_rxbuf, uint8_t **buf, + uint32_t *bufsize) +{ + uint32_t alloc_size = is_rxbuf ? cfg->rxbuf_size : cfg->txbuf_size; + uint8_t *lb = realloc (*buf, (size_t) alloc_size); + + if (lb) + { + if (is_rxbuf) + cfg->rxbuf_size = *bufsize = alloc_size; + else + cfg->txbuf_size = *bufsize = alloc_size; + + *buf = lb; + } + else + { + int errno_val = errno; + perror ("ERROR in sock_test_buf_alloc()"); + fprintf (stderr, "ERROR: Buffer allocation failed (errno = %d)!\n" + " Using buffer size %d instead of desired" + " size (%d)\n", errno_val, *bufsize, alloc_size); + } +} + +static inline void +sock_test_socket_buf_alloc (sock_test_socket_t *socket) +{ + socket->rxbuf_size = socket->cfg.rxbuf_size; + socket->txbuf_size = socket->cfg.txbuf_size; + sock_test_buf_alloc (&socket->cfg, 0 /* is_rxbuf */ , + (uint8_t **) &socket->txbuf, &socket->txbuf_size); + sock_test_buf_alloc (&socket->cfg, 1 /* is_rxbuf */ , + (uint8_t **) &socket->rxbuf, &socket->rxbuf_size); +} + +static inline char * +sock_test_type_str (sock_test_t t) +{ + switch (t) + { + case SOCK_TEST_TYPE_NONE: + return "NONE"; + + case SOCK_TEST_TYPE_ECHO: + return "ECHO"; + + case SOCK_TEST_TYPE_UNI: + return "UNI"; + + case SOCK_TEST_TYPE_BI: + return "BI"; + + case SOCK_TEST_TYPE_EXIT: + return "EXIT"; + + default: + return "Unknown"; + } +} + +static inline void +sock_test_cfg_dump (sock_test_cfg_t * cfg, uint8_t is_client) +{ + char *spc = " "; + + printf (" test config (%p):\n" + SOCK_TEST_SEPARATOR_STRING + " magic: 0x%08x\n" + "%-5s test: %s (%d)\n" + " ctrl handle: %d (0x%x)\n" + "%-5s num test sockets: %u (0x%08x)\n" + "%-5s verbose: %s (%d)\n" + "%-5s rxbuf size: %lu (0x%08lx)\n" + "%-5s txbuf size: %lu (0x%08lx)\n" + "%-5s num writes: %lu (0x%08lx)\n" + " client tx bytes: %lu (0x%08lx)\n" + SOCK_TEST_SEPARATOR_STRING, + (void *) cfg, cfg->magic, + is_client && (cfg->test == SOCK_TEST_TYPE_UNI) ? + "'"SOCK_TEST_TOKEN_RUN_UNI"'" : + is_client && (cfg->test == SOCK_TEST_TYPE_BI) ? + "'"SOCK_TEST_TOKEN_RUN_BI"'" : spc, + sock_test_type_str (cfg->test), cfg->test, + cfg->ctrl_handle, cfg->ctrl_handle, + is_client ? "'"SOCK_TEST_TOKEN_NUM_TEST_SCKTS"'" : spc, + cfg->num_test_sockets, cfg->num_test_sockets, + is_client ? "'"SOCK_TEST_TOKEN_VERBOSE"'" : spc, + cfg->verbose ? "on" : "off", cfg->verbose, + is_client ? "'"SOCK_TEST_TOKEN_RXBUF_SIZE"'" : spc, + cfg->rxbuf_size, cfg->rxbuf_size, + is_client ? "'"SOCK_TEST_TOKEN_TXBUF_SIZE"'" : spc, + cfg->txbuf_size, cfg->txbuf_size, + is_client ? "'"SOCK_TEST_TOKEN_NUM_WRITES"'" : spc, + cfg->num_writes, cfg->num_writes, + cfg->total_bytes, cfg->total_bytes); +} + +static inline void +sock_test_stats_dump (char * header, sock_test_stats_t * stats, + uint8_t show_rx, uint8_t show_tx, + uint8_t verbose) +{ + struct timespec diff; + double duration, rate; + uint64_t total_bytes; + + if ((stats->stop.tv_nsec - stats->start.tv_nsec) < 0) + { + diff.tv_sec = stats->stop.tv_sec - stats->start.tv_sec - 1; + diff.tv_nsec = stats->stop.tv_nsec - stats->start.tv_nsec + 1000000000; + } + else + { + diff.tv_sec = stats->stop.tv_sec - stats->start.tv_sec; + diff.tv_nsec = stats->stop.tv_nsec - stats->start.tv_nsec; + } + duration = (double) diff.tv_sec + (1e-9 * diff.tv_nsec); + + total_bytes = stats->tx_bytes + stats->rx_bytes; + rate = (double) total_bytes * 8 / duration / ONE_GIG; + printf ("\n%s: Streamed %lu bytes\n" + " in %lf seconds (%lf Gbps %s-duplex)!\n", + header, total_bytes, duration, rate, + (show_rx && show_tx) ? "full" : "half"); + + if (show_tx) + { + printf (SOCK_TEST_SEPARATOR_STRING + " tx stats (0x%p):\n" + SOCK_TEST_SEPARATOR_STRING + " writes: %lu (0x%08lx)\n" + " tx bytes: %lu (0x%08lx)\n" + " tx eagain: %u (0x%08x)\n" + " tx incomplete: %u (0x%08x)\n", + (void *)stats, stats->tx_xacts, stats->tx_xacts, + stats->tx_bytes, stats->tx_bytes, + stats->tx_eagain, stats->tx_eagain, + stats->tx_incomp, stats->tx_incomp); + } + if (show_rx) + { + printf (SOCK_TEST_SEPARATOR_STRING + " rx stats (0x%p):\n" + SOCK_TEST_SEPARATOR_STRING + " reads: %lu (0x%08lx)\n" + " rx bytes: %lu (0x%08lx)\n" + " rx eagain: %u (0x%08x)\n" + " rx incomplete: %u (0x%08x)\n", + (void *)stats, stats->rx_xacts, stats->rx_xacts, + stats->rx_bytes, stats->rx_bytes, + stats->rx_eagain, stats->rx_eagain, + stats->rx_incomp, stats->rx_incomp); + } + if (verbose) + printf (" start.tv_sec: %ld\n" + " start.tv_nsec: %ld\n" + " stop.tv_sec: %ld\n" + " stop.tv_nsec: %ld\n", + stats->start.tv_sec, stats->start.tv_nsec, + stats->stop.tv_sec, stats->stop.tv_nsec); + + printf (SOCK_TEST_SEPARATOR_STRING); +} + +static inline int +sock_test_read (int fd, uint8_t *buf, uint32_t nbytes, + sock_test_stats_t *stats) +{ + int rx_bytes, errno_val; + + do + { + if (stats) + stats->rx_xacts++; +#ifdef VCL_TEST + rx_bytes = vppcom_session_read (fd, buf, nbytes); + + if (rx_bytes < 0) + { + errno = -rx_bytes; + rx_bytes = -1; + } +#else + rx_bytes = read (fd, buf, nbytes); +#endif + if (stats) + { + if ((rx_bytes == 0) || + ((rx_bytes < 0) && ((errno == EAGAIN) || (errno == EWOULDBLOCK)))) + stats->rx_eagain++; + else if (rx_bytes < nbytes) + stats->rx_incomp++; + } + } + while ((rx_bytes == 0) || + ((rx_bytes < 0) && ((errno == EAGAIN) || (errno == EWOULDBLOCK)))); + + if (rx_bytes < 0) + { + errno_val = errno; + perror ("ERROR in sock_test_read()"); + fprintf (stderr, "ERROR: socket read failed (errno = %d)!\n", + errno_val); + errno = errno_val; + } + else if (stats) + stats->rx_bytes += rx_bytes; + + return (rx_bytes); +} + +static inline int +sock_test_write (int fd, uint8_t *buf, uint32_t nbytes, + sock_test_stats_t *stats, uint32_t verbose) +{ + int tx_bytes = 0; + int nbytes_left = nbytes; + int rv, errno_val; + + do + { + if (stats) + stats->tx_xacts++; +#ifdef VCL_TEST + rv = vppcom_session_write (fd, buf, nbytes_left); + if (rv < 0) + { + errno = -rv; + rv = -1; + } +#else + rv = write (fd, buf, nbytes_left); +#endif + if (rv < 0) + { + if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) + { + if (stats) + stats->tx_eagain++; + continue; + } + else + break; + } + tx_bytes += rv; + + if (tx_bytes != nbytes) + { + nbytes_left = nbytes_left - rv; + if (stats) + stats->tx_incomp++; + if (verbose) + { + printf ("WARNING: bytes written (%d) != bytes to write (%d)!\n", + tx_bytes, nbytes); + } + } + + } while (tx_bytes != nbytes); + + if (tx_bytes < 0) + { + errno_val = errno; + perror ("ERROR in sock_test_write()"); + fprintf (stderr, "ERROR: socket write failed (errno = %d)!\n", + errno_val); + } + else if (stats) + stats->tx_bytes += tx_bytes; + + return (tx_bytes); +} + +#endif /* __sock_test_h__ */ diff --git a/src/vcl/sock_test_client.c b/src/vcl/sock_test_client.c new file mode 100644 index 00000000000..40ba043adae --- /dev/null +++ b/src/vcl/sock_test_client.c @@ -0,0 +1,1076 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <unistd.h> +#include <errno.h> +#include <stdlib.h> +#include <ctype.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <stdio.h> +#include <time.h> +#include <arpa/inet.h> +#include <vcl/sock_test.h> + +typedef struct +{ +#ifdef VCL_TEST + vppcom_endpt_t server_endpt; +#endif + struct sockaddr_in server_addr; + sock_test_socket_t ctrl_socket; + sock_test_socket_t *test_socket; + uint32_t num_test_sockets; + uint8_t dump_cfg; +} sock_client_main_t; + +sock_client_main_t sock_client_main; + + +static int +sock_test_cfg_sync (sock_test_socket_t * socket) +{ + sock_client_main_t *scm = &sock_client_main; + sock_test_socket_t *ctrl = &scm->ctrl_socket; + sock_test_cfg_t *rl_cfg = (sock_test_cfg_t *) socket->rxbuf; + int rx_bytes, tx_bytes; + + if (socket->cfg.verbose) + sock_test_cfg_dump (&socket->cfg, 1 /* is_client */ ); + + tx_bytes = sock_test_write (socket->fd, (uint8_t *) & ctrl->cfg, + sizeof (ctrl->cfg), NULL, ctrl->cfg.verbose); + if (tx_bytes < 0) + { + fprintf (stderr, "ERROR: write test cfg failed (%d)!\n", tx_bytes); + return tx_bytes; + } + + rx_bytes = sock_test_read (socket->fd, (uint8_t *) socket->rxbuf, + sizeof (sock_test_cfg_t), NULL); + if (rx_bytes < 0) + return rx_bytes; + + if (rl_cfg->magic != SOCK_TEST_CFG_CTRL_MAGIC) + { + fprintf (stderr, "ERROR: Bad server reply cfg -- aborting!\n"); + return -1; + } + if (socket->cfg.verbose) + { + printf ("CLIENT (fd %d): Got config back from server.\n", socket->fd); + sock_test_cfg_dump (rl_cfg, 1 /* is_client */ ); + } + if ((rx_bytes != sizeof (sock_test_cfg_t)) + || !sock_test_cfg_verify (rl_cfg, &ctrl->cfg)) + { + fprintf (stderr, + "ERROR: Invalid config received from server -- aborting!\n"); + sock_test_cfg_dump (rl_cfg, 1 /* is_client */ ); + return -1; + } + ctrl->cfg.ctrl_handle = ((ctrl->cfg.ctrl_handle == ~0) ? + rl_cfg->ctrl_handle : ctrl->cfg.ctrl_handle); + + return 0; +} + +static void +echo_test_client () +{ + sock_client_main_t *scm = &sock_client_main; + sock_test_socket_t *ctrl = &scm->ctrl_socket; + sock_test_socket_t *tsock; + int rx_bytes, tx_bytes, nbytes; + uint32_t i, n; + int rv; + int nfds = 0; + fd_set wr_fdset, rd_fdset; + fd_set _wfdset, *wfdset = &_wfdset; + fd_set _rfdset, *rfdset = &_rfdset; + + FD_ZERO (&wr_fdset); + FD_ZERO (&rd_fdset); + memset (&ctrl->stats, 0, sizeof (ctrl->stats)); + ctrl->cfg.total_bytes = nbytes = strlen (ctrl->txbuf) + 1; + for (n = 0; n != ctrl->cfg.num_test_sockets; n++) + { + tsock = &scm->test_socket[n]; + tsock->cfg = ctrl->cfg; + sock_test_socket_buf_alloc (tsock); + sock_test_cfg_sync (tsock); + + memcpy (tsock->txbuf, ctrl->txbuf, nbytes); + memset (&tsock->stats, 0, sizeof (tsock->stats)); + + FD_SET (tsock->fd, &wr_fdset); + FD_SET (tsock->fd, &rd_fdset); + nfds = ((tsock->fd + 1) > nfds) ? (tsock->fd + 1) : nfds; + } + + nfds++; + clock_gettime (CLOCK_REALTIME, &ctrl->stats.start); + while (n) + { + _wfdset = wr_fdset; + _rfdset = rd_fdset; + +#ifdef VCL_TEST + rv = vppcom_select (nfds, (uint64_t *) rfdset, (uint64_t *) wfdset, + NULL, 0); +#else + { + struct timeval timeout; + timeout.tv_sec = 0; + timeout.tv_usec = 0; + rv = select (nfds, rfdset, wfdset, NULL, &timeout); + } +#endif + if (rv < 0) + { + perror ("select()"); + fprintf (stderr, "\nERROR: select() failed -- aborting test!\n"); + return; + } + else if (rv == 0) + continue; + + for (i = 0; i < ctrl->cfg.num_test_sockets; i++) + { + tsock = &scm->test_socket[i]; + if (!((tsock->stats.stop.tv_sec == 0) && + (tsock->stats.stop.tv_nsec == 0))) + continue; + + if (FD_ISSET (tsock->fd, wfdset) && + (tsock->stats.tx_bytes < ctrl->cfg.total_bytes)) + + { + tx_bytes = + sock_test_write (tsock->fd, (uint8_t *) tsock->txbuf, nbytes, + &tsock->stats, ctrl->cfg.verbose); + if (tx_bytes < 0) + { + fprintf (stderr, "\nERROR: sock_test_write(%d) failed " + "-- aborting test!\n", tsock->fd); + return; + } + + printf ("CLIENT (fd %d): TX (%d bytes) - '%s'\n", + tsock->fd, tx_bytes, tsock->txbuf); + } + + if ((FD_ISSET (tsock->fd, rfdset)) && + (tsock->stats.rx_bytes < ctrl->cfg.total_bytes)) + { + rx_bytes = + sock_test_read (tsock->fd, (uint8_t *) tsock->rxbuf, + nbytes, &tsock->stats); + if (rx_bytes > 0) + { + printf ("CLIENT (fd %d): RX (%d bytes) - '%s'\n", + tsock->fd, rx_bytes, tsock->rxbuf); + + if (tsock->stats.rx_bytes != tsock->stats.tx_bytes) + printf + ("WARNING: bytes read (%lu) != bytes written (%lu)!\n", + tsock->stats.rx_bytes, tsock->stats.tx_bytes); + } + } + + if (tsock->stats.rx_bytes >= ctrl->cfg.total_bytes) + { + clock_gettime (CLOCK_REALTIME, &tsock->stats.stop); + n--; + } + } + } + clock_gettime (CLOCK_REALTIME, &ctrl->stats.stop); + + for (i = 0; i < ctrl->cfg.num_test_sockets; i++) + { + tsock = &scm->test_socket[i]; + tsock->stats.start = ctrl->stats.start; + + if (ctrl->cfg.verbose) + { + static char buf[64]; + + sprintf (buf, "CLIENT (fd %d) RESULTS", tsock->fd); + sock_test_stats_dump (buf, &tsock->stats, + 1 /* show_rx */ , 1 /* show tx */ , + ctrl->cfg.verbose); + } + + sock_test_stats_accumulate (&ctrl->stats, &tsock->stats); + } + + if (ctrl->cfg.verbose) + { + sock_test_stats_dump ("CLIENT RESULTS", &ctrl->stats, + 1 /* show_rx */ , 1 /* show tx */ , + ctrl->cfg.verbose); + sock_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ ); + + if (ctrl->cfg.verbose > 1) + { + printf (" ctrl socket info\n" + SOCK_TEST_SEPARATOR_STRING + " fd: %d (0x%08x)\n" + " rxbuf: %p\n" + " rxbuf size: %u (0x%08x)\n" + " txbuf: %p\n" + " txbuf size: %u (0x%08x)\n" + SOCK_TEST_SEPARATOR_STRING, + ctrl->fd, (uint32_t) ctrl->fd, + ctrl->rxbuf, ctrl->rxbuf_size, ctrl->rxbuf_size, + ctrl->txbuf, ctrl->txbuf_size, ctrl->txbuf_size); + } + } +} + +static void +stream_test_client (sock_test_t test) +{ + sock_client_main_t *scm = &sock_client_main; + sock_test_socket_t *ctrl = &scm->ctrl_socket; + sock_test_socket_t *tsock; + int tx_bytes; + uint32_t i, n; + int rv; + int nfds = 0; + fd_set wr_fdset, rd_fdset; + fd_set _wfdset, *wfdset = &_wfdset; + fd_set _rfdset, *rfdset = (test == SOCK_TEST_TYPE_BI) ? &_rfdset : 0; + + ctrl->cfg.total_bytes = ctrl->cfg.num_writes * ctrl->cfg.txbuf_size; + ctrl->cfg.ctrl_handle = ~0; + + printf ("\n" SOCK_TEST_BANNER_STRING + "CLIENT (fd %d): %s-directional Stream Test!\n\n" + "CLIENT (fd %d): Sending config to server on ctrl socket...\n", + ctrl->fd, test == SOCK_TEST_TYPE_BI ? "Bi" : "Uni", ctrl->fd); + + if (sock_test_cfg_sync (ctrl)) + { + fprintf (stderr, "ERROR: test cfg sync failed -- aborting!"); + return; + } + + FD_ZERO (&wr_fdset); + FD_ZERO (&rd_fdset); + memset (&ctrl->stats, 0, sizeof (ctrl->stats)); + for (n = 0; n != ctrl->cfg.num_test_sockets; n++) + { + tsock = &scm->test_socket[n]; + tsock->cfg = ctrl->cfg; + sock_test_socket_buf_alloc (tsock); + printf ("CLIENT (fd %d): Sending config to server on " + "test socket %d...\n", tsock->fd, n); + sock_test_cfg_sync (tsock); + + /* Fill payload with incrementing uint32's */ + for (i = 0; i < tsock->txbuf_size; i++) + tsock->txbuf[i] = i & 0xff; + + memset (&tsock->stats, 0, sizeof (tsock->stats)); + FD_SET (tsock->fd, &wr_fdset); + FD_SET (tsock->fd, &rd_fdset); + nfds = ((tsock->fd + 1) > nfds) ? (tsock->fd + 1) : nfds; + } + + nfds++; + clock_gettime (CLOCK_REALTIME, &ctrl->stats.start); + while (n) + { + _wfdset = wr_fdset; + _rfdset = rd_fdset; + +#ifdef VCL_TEST + rv = vppcom_select (nfds, (uint64_t *) rfdset, (uint64_t *) wfdset, + NULL, 0); +#else + { + struct timeval timeout; + timeout.tv_sec = 0; + timeout.tv_usec = 0; + rv = select (nfds, rfdset, wfdset, NULL, &timeout); + } +#endif + if (rv < 0) + { + perror ("select()"); + fprintf (stderr, "\nERROR: select() failed -- aborting test!\n"); + return; + } + else if (rv == 0) + continue; + + for (i = 0; i < ctrl->cfg.num_test_sockets; i++) + { + tsock = &scm->test_socket[i]; + if (!((tsock->stats.stop.tv_sec == 0) && + (tsock->stats.stop.tv_nsec == 0))) + continue; + + if (FD_ISSET (tsock->fd, wfdset) && + (tsock->stats.tx_bytes < ctrl->cfg.total_bytes)) + { + tx_bytes = + sock_test_write (tsock->fd, (uint8_t *) tsock->txbuf, + ctrl->cfg.txbuf_size, &tsock->stats, + ctrl->cfg.verbose); + if (tx_bytes < 0) + { + fprintf (stderr, "\nERROR: sock_test_write(%d) failed " + "-- aborting test!\n", tsock->fd); + return; + } + } + + if ((test == SOCK_TEST_TYPE_BI) && + FD_ISSET (tsock->fd, rfdset) && + (tsock->stats.rx_bytes < ctrl->cfg.total_bytes)) + { + (void) sock_test_read (tsock->fd, + (uint8_t *) tsock->rxbuf, + tsock->rxbuf_size, &tsock->stats); + } + + if (((test == SOCK_TEST_TYPE_UNI) && + (tsock->stats.tx_bytes >= ctrl->cfg.total_bytes)) || + ((test == SOCK_TEST_TYPE_BI) && + (tsock->stats.rx_bytes >= ctrl->cfg.total_bytes))) + { + clock_gettime (CLOCK_REALTIME, &tsock->stats.stop); + n--; + } + } + } + clock_gettime (CLOCK_REALTIME, &ctrl->stats.stop); + + printf ("CLIENT (fd %d): Sending config to server on ctrl socket...\n", + ctrl->fd); + + if (sock_test_cfg_sync (ctrl)) + { + fprintf (stderr, "ERROR: test cfg sync failed -- aborting!"); + return; + } + + for (i = 0; i < ctrl->cfg.num_test_sockets; i++) + { + tsock = &scm->test_socket[i]; + + if (ctrl->cfg.verbose) + { + static char buf[64]; + + sprintf (buf, "CLIENT (fd %d) RESULTS", tsock->fd); + sock_test_stats_dump (buf, &tsock->stats, + test == SOCK_TEST_TYPE_BI /* show_rx */ , + 1 /* show tx */ , ctrl->cfg.verbose); + } + + sock_test_stats_accumulate (&ctrl->stats, &tsock->stats); + } + + sock_test_stats_dump ("CLIENT RESULTS", &ctrl->stats, + test == SOCK_TEST_TYPE_BI /* show_rx */ , + 1 /* show tx */ , ctrl->cfg.verbose); + sock_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ ); + + if (ctrl->cfg.verbose) + { + printf (" ctrl socket info\n" + SOCK_TEST_SEPARATOR_STRING + " fd: %d (0x%08x)\n" + " rxbuf: %p\n" + " rxbuf size: %u (0x%08x)\n" + " txbuf: %p\n" + " txbuf size: %u (0x%08x)\n" + SOCK_TEST_SEPARATOR_STRING, + ctrl->fd, (uint32_t) ctrl->fd, + ctrl->rxbuf, ctrl->rxbuf_size, ctrl->rxbuf_size, + ctrl->txbuf, ctrl->txbuf_size, ctrl->txbuf_size); + } + + ctrl->cfg.test = SOCK_TEST_TYPE_ECHO; + if (sock_test_cfg_sync (ctrl)) + fprintf (stderr, "ERROR: post-test cfg sync failed!"); + + printf ("CLIENT (fd %d): %s-directional Stream Test Complete!\n" + SOCK_TEST_BANNER_STRING "\n", ctrl->fd, + test == SOCK_TEST_TYPE_BI ? "Bi" : "Uni"); +} + +static void +exit_client (void) +{ + sock_client_main_t *scm = &sock_client_main; + sock_test_socket_t *ctrl = &scm->ctrl_socket; + sock_test_socket_t *tsock; + int i; + + for (i = 0; i < ctrl->cfg.num_test_sockets; i++) + { + tsock = &scm->test_socket[i]; + tsock->cfg.test = SOCK_TEST_TYPE_EXIT; + + /* coverity[COPY_PASTE_ERROR] */ + if (ctrl->cfg.verbose) + { + printf ("\nCLIENT (fd %d): Sending exit cfg to server...\n", + tsock->fd); + sock_test_cfg_dump (&tsock->cfg, 1 /* is_client */ ); + } + (void) sock_test_write (tsock->fd, (uint8_t *) & tsock->cfg, + sizeof (tsock->cfg), &tsock->stats, + ctrl->cfg.verbose); + } + + ctrl->cfg.test = SOCK_TEST_TYPE_EXIT; + if (ctrl->cfg.verbose) + { + printf ("\nCLIENT (fd %d): Sending exit cfg to server...\n", ctrl->fd); + sock_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ ); + } + (void) sock_test_write (ctrl->fd, (uint8_t *) & ctrl->cfg, + sizeof (ctrl->cfg), &ctrl->stats, + ctrl->cfg.verbose); + printf ("\nCLIENT: So long and thanks for all the fish!\n\n"); + sleep (1); +} + +static int +sock_test_connect_test_sockets (uint32_t num_test_sockets) +{ + sock_client_main_t *scm = &sock_client_main; + sock_test_socket_t *ctrl = &scm->ctrl_socket; + sock_test_socket_t *tsock; + int i, rv, errno_val; + + if (num_test_sockets < 1) + { + errno = EINVAL; + return -1; + } + + if (num_test_sockets < scm->num_test_sockets) + { + for (i = scm->num_test_sockets - 1; i >= num_test_sockets; i--) + { + tsock = &scm->test_socket[i]; +#ifdef VCL_TEST + vppcom_session_close (tsock->fd); +#else + close (tsock->fd); +#endif + free (tsock->txbuf); + free (tsock->rxbuf); + } + } + + else if (num_test_sockets > scm->num_test_sockets) + { + tsock = realloc (scm->test_socket, + sizeof (sock_test_socket_t) * num_test_sockets); + if (!tsock) + { + errno_val = errno; + perror ("ERROR in sock_test_connect_test_sockets()"); + fprintf (stderr, "ERROR: socket failed (errno = %d)!\n", errno_val); + return -1; + } + + if (!scm->test_socket) + memset (tsock, 0, sizeof (*tsock)); + + scm->test_socket = tsock; + for (i = scm->num_test_sockets; i < num_test_sockets; i++) + { + tsock = &scm->test_socket[i]; +#ifdef VCL_TEST + tsock->fd = + vppcom_session_create (VPPCOM_VRF_DEFAULT, VPPCOM_PROTO_TCP, + 1 /* is_nonblocking */ ); + if (tsock->fd < 0) + { + errno = -tsock->fd; + tsock->fd = -1; + } +#else + tsock->fd = socket (AF_INET, SOCK_STREAM, 0); +#endif + if (tsock->fd < 0) + { + errno_val = errno; + perror ("ERROR in sock_test_connect_test_sockets()"); + fprintf (stderr, "ERROR: socket failed (errno = %d)!\n", + errno_val); + return tsock->fd; + } + +#ifdef VCL_TEST + rv = vppcom_session_connect (tsock->fd, &scm->server_endpt); +#else + rv = + connect (tsock->fd, (struct sockaddr *) &scm->server_addr, + sizeof (scm->server_addr)); +#endif + if (rv < 0) + { + errno_val = errno; + perror ("ERROR in main()"); + fprintf (stderr, "ERROR: connect failed (errno = %d)!\n", + errno_val); + } + tsock->cfg = ctrl->cfg; + sock_test_socket_buf_alloc (tsock); + sock_test_cfg_sync (tsock); + + printf ("CLIENT (fd %d): Test socket %d connected.\n", + tsock->fd, i); + } + } + + scm->num_test_sockets = num_test_sockets; + printf ("CLIENT: All sockets (%d) connected!\n", scm->num_test_sockets + 1); + return 0; +} + +static void +dump_help (void) +{ +#define INDENT "\n " + + printf ("Test configuration commands:" + INDENT SOCK_TEST_TOKEN_HELP + "\t\t\tDisplay help." + INDENT SOCK_TEST_TOKEN_EXIT + "\t\t\tExit test client & server." + INDENT SOCK_TEST_TOKEN_SHOW_CFG + "\t\t\tShow the current test cfg." + INDENT SOCK_TEST_TOKEN_RUN_UNI + "\t\t\tRun the Uni-directional test." + INDENT SOCK_TEST_TOKEN_RUN_BI + "\t\t\tRun the Bi-directional test." + INDENT SOCK_TEST_TOKEN_VERBOSE + "\t\t\tToggle verbose setting." + INDENT SOCK_TEST_TOKEN_RXBUF_SIZE + "<rxbuf size>\tRx buffer size (bytes)." + INDENT SOCK_TEST_TOKEN_TXBUF_SIZE + "<txbuf size>\tTx buffer size (bytes)." + INDENT SOCK_TEST_TOKEN_NUM_WRITES + "<# of writes>\tNumber of txbuf writes to server." "\n"); +} + +static void +cfg_txbuf_size_set (void) +{ + sock_client_main_t *scm = &sock_client_main; + sock_test_socket_t *ctrl = &scm->ctrl_socket; + char *p = ctrl->txbuf + strlen (SOCK_TEST_TOKEN_TXBUF_SIZE); + uint64_t txbuf_size = strtoull ((const char *) p, NULL, 10); + + if (txbuf_size >= SOCK_TEST_CFG_BUF_SIZE_MIN) + { + ctrl->cfg.txbuf_size = txbuf_size; + ctrl->cfg.total_bytes = ctrl->cfg.num_writes * ctrl->cfg.txbuf_size; + sock_test_buf_alloc (&ctrl->cfg, 0 /* is_rxbuf */ , + (uint8_t **) & ctrl->txbuf, &ctrl->txbuf_size); + sock_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ ); + } + else + fprintf (stderr, + "ERROR: Invalid txbuf size (%lu) < minimum buf size (%u)!\n", + txbuf_size, SOCK_TEST_CFG_BUF_SIZE_MIN); +} + +static void +cfg_num_writes_set (void) +{ + sock_client_main_t *scm = &sock_client_main; + sock_test_socket_t *ctrl = &scm->ctrl_socket; + char *p = ctrl->txbuf + strlen (SOCK_TEST_TOKEN_NUM_WRITES); + uint32_t num_writes = strtoul ((const char *) p, NULL, 10); + + if (num_writes > 0) + { + ctrl->cfg.num_writes = num_writes; + ctrl->cfg.total_bytes = ctrl->cfg.num_writes * ctrl->cfg.txbuf_size; + sock_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ ); + } + else + { + fprintf (stderr, "ERROR: invalid num writes: %u\n", num_writes); + } +} + +static void +cfg_num_test_sockets_set (void) +{ + sock_client_main_t *scm = &sock_client_main; + sock_test_socket_t *ctrl = &scm->ctrl_socket; + char *p = ctrl->txbuf + strlen (SOCK_TEST_TOKEN_NUM_TEST_SCKTS); + uint32_t num_test_sockets = strtoul ((const char *) p, NULL, 10); + + if ((num_test_sockets > 0) && + (num_test_sockets <= SOCK_TEST_CFG_MAX_TEST_SCKTS)) + { + ctrl->cfg.num_test_sockets = num_test_sockets; + sock_test_connect_test_sockets (num_test_sockets); + sock_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ ); + } + else + { + fprintf (stderr, "ERROR: invalid num test sockets: %u, (%d max)\n", + num_test_sockets, SOCK_TEST_CFG_MAX_TEST_SCKTS); + } +} + +static void +cfg_rxbuf_size_set (void) +{ + sock_client_main_t *scm = &sock_client_main; + sock_test_socket_t *ctrl = &scm->ctrl_socket; + char *p = ctrl->txbuf + strlen (SOCK_TEST_TOKEN_RXBUF_SIZE); + uint64_t rxbuf_size = strtoull ((const char *) p, NULL, 10); + + if (rxbuf_size >= SOCK_TEST_CFG_BUF_SIZE_MIN) + { + ctrl->cfg.rxbuf_size = rxbuf_size; + sock_test_buf_alloc (&ctrl->cfg, 1 /* is_rxbuf */ , + (uint8_t **) & ctrl->rxbuf, &ctrl->rxbuf_size); + sock_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ ); + } + else + fprintf (stderr, + "ERROR: Invalid rxbuf size (%lu) < minimum buf size (%u)!\n", + rxbuf_size, SOCK_TEST_CFG_BUF_SIZE_MIN); +} + +static void +cfg_verbose_toggle (void) +{ + sock_client_main_t *scm = &sock_client_main; + sock_test_socket_t *ctrl = &scm->ctrl_socket; + + ctrl->cfg.verbose = ctrl->cfg.verbose ? 0 : 1; + sock_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ ); + +} + +static sock_test_t +parse_input () +{ + sock_client_main_t *scm = &sock_client_main; + sock_test_socket_t *ctrl = &scm->ctrl_socket; + sock_test_t rv = SOCK_TEST_TYPE_NONE; + + if (!strcmp (SOCK_TEST_TOKEN_EXIT, ctrl->txbuf)) + rv = SOCK_TEST_TYPE_EXIT; + + else if (!strcmp (SOCK_TEST_TOKEN_HELP, ctrl->txbuf)) + dump_help (); + + else if (!strcmp (SOCK_TEST_TOKEN_SHOW_CFG, ctrl->txbuf)) + scm->dump_cfg = 1; + + else if (!strcmp (SOCK_TEST_TOKEN_VERBOSE, ctrl->txbuf)) + cfg_verbose_toggle (); + + else if (!strncmp (SOCK_TEST_TOKEN_TXBUF_SIZE, ctrl->txbuf, + strlen (SOCK_TEST_TOKEN_TXBUF_SIZE))) + cfg_txbuf_size_set (); + + else if (!strncmp (SOCK_TEST_TOKEN_NUM_TEST_SCKTS, ctrl->txbuf, + strlen (SOCK_TEST_TOKEN_NUM_TEST_SCKTS))) + cfg_num_test_sockets_set (); + + else if (!strncmp (SOCK_TEST_TOKEN_NUM_WRITES, ctrl->txbuf, + strlen (SOCK_TEST_TOKEN_NUM_WRITES))) + cfg_num_writes_set (); + + else if (!strncmp (SOCK_TEST_TOKEN_RXBUF_SIZE, ctrl->txbuf, + strlen (SOCK_TEST_TOKEN_RXBUF_SIZE))) + cfg_rxbuf_size_set (); + + else if (!strncmp (SOCK_TEST_TOKEN_RUN_UNI, ctrl->txbuf, + strlen (SOCK_TEST_TOKEN_RUN_UNI))) + rv = ctrl->cfg.test = SOCK_TEST_TYPE_UNI; + + else if (!strncmp (SOCK_TEST_TOKEN_RUN_BI, ctrl->txbuf, + strlen (SOCK_TEST_TOKEN_RUN_BI))) + rv = ctrl->cfg.test = SOCK_TEST_TYPE_BI; + + else + rv = SOCK_TEST_TYPE_ECHO; + + return rv; +} + +void +print_usage_and_exit (void) +{ + fprintf (stderr, + "sock_test_client [OPTIONS] <ipaddr> <port>\n" + " OPTIONS\n" + " -h Print this message and exit.\n" + " -c Print test config before test.\n" + " -w <dir> Write test results to <dir>.\n" + " -X Exit after running test.\n" + " -E Run Echo test.\n" + " -N <num-writes> Test Cfg: number of writes.\n" + " -R <rxbuf-size> Test Cfg: rx buffer size.\n" + " -T <txbuf-size> Test Cfg: tx buffer size.\n" + " -U Run Uni-directional test.\n" + " -B Run Bi-directional test.\n" + " -V Verbose mode.\n"); + exit (1); +} + +int +main (int argc, char **argv) +{ + sock_client_main_t *scm = &sock_client_main; + sock_test_socket_t *ctrl = &scm->ctrl_socket; + int c, rv, errno_val; + sock_test_t post_test = SOCK_TEST_TYPE_NONE; + + sock_test_cfg_init (&ctrl->cfg); + sock_test_socket_buf_alloc (ctrl); + + opterr = 0; + while ((c = getopt (argc, argv, "chn:w:XE:I:N:R:T:UBV")) != -1) + switch (c) + { + case 'c': + scm->dump_cfg = 1; + break; + + case 's': + if (sscanf (optarg, "0x%x", &ctrl->cfg.num_test_sockets) != 1) + if (sscanf (optarg, "%u", &ctrl->cfg.num_test_sockets) != 1) + { + fprintf (stderr, "ERROR: Invalid value for option -%c!\n", c); + print_usage_and_exit (); + } + if (!ctrl->cfg.num_test_sockets || + (ctrl->cfg.num_test_sockets > FD_SETSIZE)) + { + fprintf (stderr, "ERROR: Invalid number of sockets (%d)" + "specified for option -%c!\n" + " Valid range is 1 - %d\n", + ctrl->cfg.num_test_sockets, c, FD_SETSIZE); + print_usage_and_exit (); + } + break; + + case 'w': + fprintf (stderr, "Writing test results to files is TBD.\n"); + break; + + case 'X': + post_test = SOCK_TEST_TYPE_EXIT; + break; + + case 'E': + if (strlen (optarg) > ctrl->txbuf_size) + { + fprintf (stderr, + "ERROR: Option -%c value larger than txbuf size (%d)!\n", + optopt, ctrl->txbuf_size); + print_usage_and_exit (); + } + strcpy (ctrl->txbuf, optarg); + ctrl->cfg.test = SOCK_TEST_TYPE_ECHO; + break; + + case 'I': + if (sscanf (optarg, "0x%x", &ctrl->cfg.num_test_sockets) != 1) + if (sscanf (optarg, "%d", &ctrl->cfg.num_test_sockets) != 1) + { + fprintf (stderr, "ERROR: Invalid value for option -%c!\n", c); + print_usage_and_exit (); + } + if (ctrl->cfg.num_test_sockets > SOCK_TEST_CFG_MAX_TEST_SCKTS) + { + fprintf (stderr, "ERROR: value greater than max number test" + " sockets (%d)!", SOCK_TEST_CFG_MAX_TEST_SCKTS); + print_usage_and_exit (); + } + break; + + case 'N': + if (sscanf (optarg, "0x%lx", &ctrl->cfg.num_writes) != 1) + if (sscanf (optarg, "%ld", &ctrl->cfg.num_writes) != 1) + { + fprintf (stderr, "ERROR: Invalid value for option -%c!\n", c); + print_usage_and_exit (); + } + ctrl->cfg.total_bytes = ctrl->cfg.num_writes * ctrl->cfg.txbuf_size; + break; + + case 'R': + if (sscanf (optarg, "0x%lx", &ctrl->cfg.rxbuf_size) != 1) + if (sscanf (optarg, "%ld", &ctrl->cfg.rxbuf_size) != 1) + { + fprintf (stderr, "ERROR: Invalid value for option -%c!\n", c); + print_usage_and_exit (); + } + if (ctrl->cfg.rxbuf_size >= SOCK_TEST_CFG_BUF_SIZE_MIN) + { + ctrl->rxbuf_size = ctrl->cfg.rxbuf_size; + sock_test_buf_alloc (&ctrl->cfg, 1 /* is_rxbuf */ , + (uint8_t **) & ctrl->rxbuf, + &ctrl->rxbuf_size); + } + else + { + fprintf (stderr, + "ERROR: rxbuf size (%lu) less than minumum (%u)\n", + ctrl->cfg.rxbuf_size, SOCK_TEST_CFG_BUF_SIZE_MIN); + print_usage_and_exit (); + } + + break; + + case 'T': + if (sscanf (optarg, "0x%lx", &ctrl->cfg.txbuf_size) != 1) + if (sscanf (optarg, "%ld", &ctrl->cfg.txbuf_size) != 1) + { + fprintf (stderr, "ERROR: Invalid value for option -%c!\n", c); + print_usage_and_exit (); + } + if (ctrl->cfg.txbuf_size >= SOCK_TEST_CFG_BUF_SIZE_MIN) + { + ctrl->txbuf_size = ctrl->cfg.txbuf_size; + sock_test_buf_alloc (&ctrl->cfg, 0 /* is_rxbuf */ , + (uint8_t **) & ctrl->txbuf, + &ctrl->txbuf_size); + ctrl->cfg.total_bytes = + ctrl->cfg.num_writes * ctrl->cfg.txbuf_size; + } + else + { + fprintf (stderr, + "ERROR: txbuf size (%lu) less than minumum (%u)!\n", + ctrl->cfg.txbuf_size, SOCK_TEST_CFG_BUF_SIZE_MIN); + print_usage_and_exit (); + } + break; + + case 'U': + ctrl->cfg.test = SOCK_TEST_TYPE_UNI; + break; + + case 'B': + ctrl->cfg.test = SOCK_TEST_TYPE_BI; + break; + + case 'V': + ctrl->cfg.verbose = 1; + break; + + case '?': + switch (optopt) + { + case 'E': + case 'I': + case 'N': + case 'R': + case 'T': + case 'w': + fprintf (stderr, "ERROR: Option -%c requires an argument.\n", + optopt); + break; + + default: + if (isprint (optopt)) + fprintf (stderr, "ERROR: Unknown option `-%c'.\n", optopt); + else + fprintf (stderr, "ERROR: Unknown option character `\\x%x'.\n", + optopt); + } + /* fall thru */ + case 'h': + default: + print_usage_and_exit (); + } + + if (argc < (optind + 2)) + { + fprintf (stderr, "ERROR: Insufficient number of arguments!\n"); + print_usage_and_exit (); + } + +#ifdef VCL_TEST + ctrl->fd = vppcom_app_create ("vcl_test_client"); + if (ctrl->fd < 0) + { + errno = -ctrl->fd; + ctrl->fd = -1; + } + else + { + ctrl->fd = vppcom_session_create (VPPCOM_VRF_DEFAULT, VPPCOM_PROTO_TCP, + 0 /* is_nonblocking */ ); + if (ctrl->fd < 0) + { + errno = -ctrl->fd; + ctrl->fd = -1; + } + } +#else + ctrl->fd = socket (AF_INET, SOCK_STREAM, 0); +#endif + + if (ctrl->fd < 0) + { + errno_val = errno; + perror ("ERROR in main()"); + fprintf (stderr, "ERROR: socket failed (errno = %d)!\n", errno_val); + return ctrl->fd; + } + + memset (&scm->server_addr, 0, sizeof (scm->server_addr)); + + scm->server_addr.sin_family = AF_INET; + inet_pton (AF_INET, argv[optind++], &(scm->server_addr.sin_addr)); + scm->server_addr.sin_port = htons (atoi (argv[optind])); + +#ifdef VCL_TEST + scm->server_endpt.vrf = VPPCOM_VRF_DEFAULT; + scm->server_endpt.is_ip4 = (scm->server_addr.sin_family == AF_INET); + scm->server_endpt.ip = (uint8_t *) & scm->server_addr.sin_addr; + scm->server_endpt.port = (uint16_t) scm->server_addr.sin_port; +#endif + + do + { + printf ("\nCLIENT: Connecting to server...\n"); + +#ifdef VCL_TEST + rv = vppcom_session_connect (ctrl->fd, &scm->server_endpt); +#else + rv = + connect (ctrl->fd, (struct sockaddr *) &scm->server_addr, + sizeof (scm->server_addr)); +#endif + if (rv < 0) + { + errno_val = errno; + perror ("ERROR in main()"); + fprintf (stderr, "ERROR: connect failed (errno = %d)!\n", + errno_val); + } + + sock_test_cfg_sync (ctrl); + printf ("CLIENT (fd %d): Control socket connected.\n", ctrl->fd); + } + while (rv < 0); + + sock_test_connect_test_sockets (ctrl->cfg.num_test_sockets); + + while (ctrl->cfg.test != SOCK_TEST_TYPE_EXIT) + { + if (scm->dump_cfg) + { + sock_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ ); + scm->dump_cfg = 0; + } + + switch (ctrl->cfg.test) + { + case SOCK_TEST_TYPE_ECHO: + echo_test_client (); + break; + + case SOCK_TEST_TYPE_UNI: + case SOCK_TEST_TYPE_BI: + stream_test_client (ctrl->cfg.test); + break; + + case SOCK_TEST_TYPE_EXIT: + continue; + + case SOCK_TEST_TYPE_NONE: + default: + break; + } + switch (post_test) + { + case SOCK_TEST_TYPE_EXIT: + switch (ctrl->cfg.test) + { + case SOCK_TEST_TYPE_EXIT: + case SOCK_TEST_TYPE_UNI: + case SOCK_TEST_TYPE_BI: + case SOCK_TEST_TYPE_ECHO: + ctrl->cfg.test = SOCK_TEST_TYPE_EXIT; + continue; + + case SOCK_TEST_TYPE_NONE: + default: + break; + } + break; + + case SOCK_TEST_TYPE_NONE: + case SOCK_TEST_TYPE_ECHO: + case SOCK_TEST_TYPE_UNI: + case SOCK_TEST_TYPE_BI: + default: + break; + } + + memset (ctrl->txbuf, 0, ctrl->txbuf_size); + memset (ctrl->rxbuf, 0, ctrl->rxbuf_size); + + printf ("\nType some characters and hit <return>\n" + "('" SOCK_TEST_TOKEN_HELP "' for help): "); + + if (fgets (ctrl->txbuf, ctrl->txbuf_size, stdin) != NULL) + { + if (strlen (ctrl->txbuf) == 1) + { + printf ("\nCLIENT: Nothing to send! Please try again...\n"); + continue; + } + ctrl->txbuf[strlen (ctrl->txbuf) - 1] = 0; // chomp the newline. + + /* Parse input for keywords */ + ctrl->cfg.test = parse_input (); + } + } + + exit_client (); +#ifdef VCL_TEST + vppcom_session_close (ctrl->fd); + vppcom_app_destroy (); +#else + close (ctrl->fd); +#endif + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vcl/sock_test_server.c b/src/vcl/sock_test_server.c new file mode 100644 index 00000000000..753a7da790c --- /dev/null +++ b/src/vcl/sock_test_server.c @@ -0,0 +1,790 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <unistd.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <stdio.h> +#include <string.h> +#include <time.h> +#include <ctype.h> +#include <vcl/sock_test.h> +#include <sys/stat.h> +#include <fcntl.h> + +#define SOCK_SERVER_USE_EPOLL 1 +#define VPPCOM_SESSION_ATTR_UNIT_TEST 0 + +#if SOCK_SERVER_USE_EPOLL +#include <sys/epoll.h> +#endif + +#ifdef VCL_TEST +#if VPPCOM_SESSION_ATTR_UNIT_TEST +#define BUFLEN sizeof (uint64_t) * 16 +uint64_t buffer[16]; +uint32_t buflen = BUFLEN; +uint32_t *flags = (uint32_t *) buffer; +#endif +#endif + +typedef struct +{ + uint8_t is_alloc; + int fd; + uint8_t *buf; + uint32_t buf_size; + sock_test_cfg_t cfg; + sock_test_stats_t stats; +#ifdef VCL_TEST + vppcom_endpt_t endpt; + uint8_t ip[16]; +#endif +} sock_server_conn_t; + +#define SOCK_SERVER_MAX_TEST_CONN 10 +#define SOCK_SERVER_MAX_EPOLL_EVENTS 10 +typedef struct +{ + int listen_fd; +#if SOCK_SERVER_USE_EPOLL + int epfd; + struct epoll_event listen_ev; + struct epoll_event wait_events[SOCK_SERVER_MAX_EPOLL_EVENTS]; +#endif + size_t num_conn; + size_t conn_pool_size; + sock_server_conn_t *conn_pool; + int nfds; + fd_set rd_fdset; + fd_set wr_fdset; + struct timeval timeout; +} sock_server_main_t; + +sock_server_main_t sock_server_main; + +#if ! SOCK_SERVER_USE_EPOLL +static inline int +get_nfds (void) +{ + sock_server_main_t *ssm = &sock_server_main; + int i, nfds; + + for (nfds = i = 0; i < FD_SETSIZE; i++) + { + if (FD_ISSET (i, &ssm->rd_fdset) || FD_ISSET (i, &ssm->wr_fdset)) + nfds = i + 1; + } + return nfds; +} + +static inline void +conn_fdset_set (sock_server_conn_t * conn, fd_set * fdset) +{ + sock_server_main_t *ssm = &sock_server_main; + + FD_SET (conn->fd, fdset); + ssm->nfds = get_nfds (); +} + +static inline void +conn_fdset_clr (sock_server_conn_t * conn, fd_set * fdset) +{ + sock_server_main_t *ssm = &sock_server_main; + + FD_CLR (conn->fd, fdset); + ssm->nfds = get_nfds (); +} +#endif + +static inline void +conn_pool_expand (size_t expand_size) +{ + sock_server_main_t *ssm = &sock_server_main; + sock_server_conn_t *conn_pool; + size_t new_size = ssm->conn_pool_size + expand_size; + int i; + + conn_pool = realloc (ssm->conn_pool, new_size * sizeof (*ssm->conn_pool)); + if (conn_pool) + { + for (i = ssm->conn_pool_size; i < new_size; i++) + { + sock_server_conn_t *conn = &conn_pool[i]; + memset (conn, 0, sizeof (*conn)); + sock_test_cfg_init (&conn->cfg); + sock_test_buf_alloc (&conn->cfg, 1 /* is_rxbuf */ , + &conn->buf, &conn->buf_size); + conn->cfg.txbuf_size = conn->cfg.rxbuf_size; + } + + ssm->conn_pool = conn_pool; + ssm->conn_pool_size = new_size; + } + else + { + int errno_val = errno; + perror ("ERROR in conn_pool_expand()"); + fprintf (stderr, "ERROR: Memory allocation failed (errno = %d)!\n", + errno_val); + } +} + +static inline sock_server_conn_t * +conn_pool_alloc (void) +{ + sock_server_main_t *ssm = &sock_server_main; + int i; + + for (i = 0; i < ssm->conn_pool_size; i++) + { + if (!ssm->conn_pool[i].is_alloc) + { +#ifdef VCL_TEST + ssm->conn_pool[i].endpt.ip = ssm->conn_pool[i].ip; +#endif + ssm->conn_pool[i].is_alloc = 1; + return (&ssm->conn_pool[i]); + } + } + + return 0; +} + +static inline void +conn_pool_free (sock_server_conn_t * conn) +{ +#if ! SOCK_SERVER_USE_EPOLL + sock_server_main_t *ssm = &sock_server_main; + + conn_fdset_clr (conn, &ssm->rd_fdset); + conn_fdset_clr (conn, &ssm->wr_fdset); +#endif + conn->fd = 0; + conn->is_alloc = 0; +} + +static inline void +sync_config_and_reply (sock_server_conn_t * conn, sock_test_cfg_t * rx_cfg) +{ + conn->cfg = *rx_cfg; + sock_test_buf_alloc (&conn->cfg, 1 /* is_rxbuf */ , + &conn->buf, &conn->buf_size); + conn->cfg.txbuf_size = conn->cfg.rxbuf_size; + + if (conn->cfg.verbose) + { + printf ("\nSERVER (fd %d): Replying to cfg message!\n", conn->fd); + sock_test_cfg_dump (&conn->cfg, 0 /* is_client */ ); + } + (void) sock_test_write (conn->fd, (uint8_t *) & conn->cfg, + sizeof (conn->cfg), NULL, conn->cfg.verbose); +} + +static void +stream_test_server_start_stop (sock_server_conn_t * conn, + sock_test_cfg_t * rx_cfg) +{ + sock_server_main_t *ssm = &sock_server_main; + int client_fd = conn->fd; + sock_test_t test = rx_cfg->test; + + if (rx_cfg->ctrl_handle == conn->fd) + { + int i; + clock_gettime (CLOCK_REALTIME, &conn->stats.stop); + + for (i = 0; i < ssm->conn_pool_size; i++) + { + sock_server_conn_t *tc = &ssm->conn_pool[i]; + + if (tc->cfg.ctrl_handle == conn->fd) + { + sock_test_stats_accumulate (&conn->stats, &tc->stats); + + if (conn->cfg.verbose) + { + static char buf[64]; + + sprintf (buf, "SERVER (fd %d) RESULTS", tc->fd); + sock_test_stats_dump (buf, &tc->stats, 1 /* show_rx */ , + test == SOCK_TEST_TYPE_BI + /* show tx */ , + conn->cfg.verbose); + } + } + } + + sock_test_stats_dump ("SERVER RESULTS", &conn->stats, 1 /* show_rx */ , + (test == SOCK_TEST_TYPE_BI) /* show_tx */ , + conn->cfg.verbose); + sock_test_cfg_dump (&conn->cfg, 0 /* is_client */ ); + if (conn->cfg.verbose) + { + printf (" sock server main\n" + SOCK_TEST_SEPARATOR_STRING + " buf: %p\n" + " buf size: %u (0x%08x)\n" + SOCK_TEST_SEPARATOR_STRING, + conn->buf, conn->buf_size, conn->buf_size); + } + + sync_config_and_reply (conn, rx_cfg); + printf ("\nSERVER (fd %d): %s-directional Stream Test Complete!\n" + SOCK_TEST_BANNER_STRING "\n", conn->fd, + test == SOCK_TEST_TYPE_BI ? "Bi" : "Uni"); + } + else + { + printf ("\n" SOCK_TEST_BANNER_STRING + "SERVER (fd %d): %s-directional Stream Test!\n" + " Sending client the test cfg to start streaming data...\n", + client_fd, test == SOCK_TEST_TYPE_BI ? "Bi" : "Uni"); + + rx_cfg->ctrl_handle = (rx_cfg->ctrl_handle == ~0) ? conn->fd : + rx_cfg->ctrl_handle; + + sync_config_and_reply (conn, rx_cfg); + + /* read the 1st chunk, record start time */ + memset (&conn->stats, 0, sizeof (conn->stats)); + clock_gettime (CLOCK_REALTIME, &conn->stats.start); + } +} + + +static inline void +stream_test_server (sock_server_conn_t * conn, int rx_bytes) +{ + int client_fd = conn->fd; + sock_test_t test = conn->cfg.test; + + if (test == SOCK_TEST_TYPE_BI) + (void) sock_test_write (client_fd, conn->buf, rx_bytes, &conn->stats, + conn->cfg.verbose); + + if (conn->stats.rx_bytes >= conn->cfg.total_bytes) + { + clock_gettime (CLOCK_REALTIME, &conn->stats.stop); + } +} + +static inline void +new_client (void) +{ + sock_server_main_t *ssm = &sock_server_main; + int client_fd; + sock_server_conn_t *conn; + + if (ssm->conn_pool_size < (ssm->num_conn + SOCK_SERVER_MAX_TEST_CONN + 1)) + conn_pool_expand (SOCK_SERVER_MAX_TEST_CONN + 1); + + conn = conn_pool_alloc (); + if (!conn) + { + fprintf (stderr, "\nERROR: No free connections!\n"); + return; + } + +#ifdef VCL_TEST + client_fd = vppcom_session_accept (ssm->listen_fd, &conn->endpt, + -1.0 /* wait forever */ ); + if (client_fd < 0) + errno = -client_fd; +#else + client_fd = accept (ssm->listen_fd, (struct sockaddr *) NULL, NULL); +#endif + if (client_fd < 0) + { + int errno_val; + errno_val = errno; + perror ("ERROR in new_client()"); + fprintf (stderr, "ERROR: accept failed (errno = %d)!\n", errno_val); + } + + printf ("SERVER: Got a connection -- fd = %d (0x%08x)!\n", + client_fd, client_fd); + + conn->fd = client_fd; + +#if ! SOCK_SERVER_USE_EPOLL + conn_fdset_set (conn, &ssm->rd_fdset); + ssm->nfds++; +#else + { + struct epoll_event ev; + int rv; + + ev.events = EPOLLIN; + ev.data.u64 = conn - ssm->conn_pool; +#ifdef VCL_TEST + rv = vppcom_epoll_ctl (ssm->epfd, EPOLL_CTL_ADD, client_fd, &ev); + if (rv) + errno = -rv; +#else + rv = epoll_ctl (ssm->epfd, EPOLL_CTL_ADD, client_fd, &ev); +#endif + if (rv < 0) + { + int errno_val; + errno_val = errno; + perror ("ERROR in new_client()"); + fprintf (stderr, "ERROR: epoll_ctl failed (errno = %d)!\n", + errno_val); + } + else + ssm->nfds++; + } +#endif +} + +int +main (int argc, char **argv) +{ + sock_server_main_t *ssm = &sock_server_main; + int client_fd, rv, main_rv = 0; + int tx_bytes, rx_bytes, nbytes; + sock_server_conn_t *conn; + sock_test_cfg_t *rx_cfg; + uint32_t xtra = 0; + uint64_t xtra_bytes = 0; + struct sockaddr_in servaddr; + int errno_val; + int v, i; + uint16_t port = SOCK_TEST_SERVER_PORT; +#if ! SOCK_SERVER_USE_EPOLL + fd_set _rfdset, *rfdset = &_rfdset; +#endif +#ifdef VCL_TEST + vppcom_endpt_t endpt; +#else +#if ! SOCK_SERVER_USE_EPOLL + fd_set _wfdset, *wfdset = &_wfdset; +#endif +#endif + + if ((argc == 2) && (sscanf (argv[1], "%d", &v) == 1)) + port = (uint16_t) v; + + conn_pool_expand (SOCK_SERVER_MAX_TEST_CONN + 1); + +#ifdef VCL_TEST + rv = vppcom_app_create ("vcl_test_server"); + if (rv) + { + errno = -rv; + ssm->listen_fd = -1; + } + else + { + ssm->listen_fd = + vppcom_session_create (VPPCOM_VRF_DEFAULT, VPPCOM_PROTO_TCP, + 0 /* is_nonblocking */ ); + } +#else + ssm->listen_fd = socket (AF_INET, SOCK_STREAM, 0); +#endif + if (ssm->listen_fd < 0) + { + errno_val = errno; + perror ("ERROR in main()"); + fprintf (stderr, "ERROR: socket() failed (errno = %d)!\n", errno_val); + return ssm->listen_fd; + } + + memset (&servaddr, 0, sizeof (servaddr)); + + servaddr.sin_family = AF_INET; + servaddr.sin_addr.s_addr = htonl (INADDR_ANY); + servaddr.sin_port = htons (port); + +#ifdef VCL_TEST + endpt.vrf = VPPCOM_VRF_DEFAULT; + endpt.is_ip4 = (servaddr.sin_family == AF_INET); + endpt.ip = (uint8_t *) & servaddr.sin_addr; + endpt.port = (uint16_t) servaddr.sin_port; + + rv = vppcom_session_bind (ssm->listen_fd, &endpt); + if (rv) + { + errno = -rv; + rv = -1; + } + +#if VPPCOM_SESSION_ATTR_UNIT_TEST + buflen = BUFLEN; + if (vppcom_session_attr (ssm->listen_fd, VPPCOM_ATTR_GET_FLAGS, + buffer, &buflen) != VPPCOM_OK) + printf ("\nGET_FLAGS0: Oh no, Mr. Biiiiiiiiiiiilllllll ! ! ! !\n"); + buflen = BUFLEN; + *flags = O_RDWR | O_NONBLOCK; + if (vppcom_session_attr (ssm->listen_fd, VPPCOM_ATTR_SET_FLAGS, + buffer, &buflen) != VPPCOM_OK) + printf ("\nSET_FLAGS1: Oh no, Mr. Biiiiiiiiiiiilllllll ! ! ! !\n"); + buflen = BUFLEN; + if (vppcom_session_attr (ssm->listen_fd, VPPCOM_ATTR_GET_FLAGS, + buffer, &buflen) != VPPCOM_OK) + printf ("\nGET_FLAGS1:Oh no, Mr. Biiiiiiiiiiiilllllll ! ! ! !\n"); + *flags = O_RDWR; + buflen = BUFLEN; + if (vppcom_session_attr (ssm->listen_fd, VPPCOM_ATTR_SET_FLAGS, + buffer, &buflen) != VPPCOM_OK) + printf ("\nSET_FLAGS2: Oh no, Mr. Biiiiiiiiiiiilllllll ! ! ! !\n"); + buflen = BUFLEN; + if (vppcom_session_attr (ssm->listen_fd, VPPCOM_ATTR_GET_FLAGS, + buffer, &buflen) != VPPCOM_OK) + printf ("\nGET_FLAGS2:Oh no, Mr. Biiiiiiiiiiiilllllll ! ! ! !\n"); + + buflen = BUFLEN; + if (vppcom_session_attr (ssm->listen_fd, VPPCOM_ATTR_GET_PEER_ADDR, + buffer, &buflen) != VPPCOM_OK) + printf ("\nGET_PEER_ADDR: Oh no, Mr. Biiiiiiiiiiiilllllll ! ! ! !\n"); + buflen = BUFLEN; + if (vppcom_session_attr (ssm->listen_fd, VPPCOM_ATTR_GET_LCL_ADDR, + buffer, &buflen) != VPPCOM_OK) + printf ("\nGET_LCL_ADDR: Oh no, Mr. Biiiiiiiiiiiilllllll ! ! ! !\n"); +#endif +#else + rv = + bind (ssm->listen_fd, (struct sockaddr *) &servaddr, sizeof (servaddr)); +#endif + if (rv < 0) + { + errno_val = errno; + perror ("ERROR in main()"); + fprintf (stderr, "ERROR: bind failed (errno = %d)!\n", errno_val); + return rv; + } + +#ifdef VCL_TEST + rv = vppcom_session_listen (ssm->listen_fd, 10); + if (rv) + { + errno = -rv; + rv = -1; + } +#else + rv = listen (ssm->listen_fd, 10); +#endif + if (rv < 0) + { + errno_val = errno; + perror ("ERROR in main()"); + fprintf (stderr, "ERROR: listen failed (errno = %d)!\n", errno_val); + return rv; + } + + printf ("\nSERVER: Waiting for a client to connect on port %d...\n", port); + +#if ! SOCK_SERVER_USE_EPOLL + + FD_ZERO (&ssm->wr_fdset); + FD_ZERO (&ssm->rd_fdset); + + FD_SET (ssm->listen_fd, &ssm->rd_fdset); + ssm->nfds = ssm->listen_fd + 1; + +#else +#ifdef VCL_TEST + ssm->epfd = vppcom_epoll_create (); + if (ssm->epfd < 0) + errno = -ssm->epfd; +#else + ssm->epfd = epoll_create (1); +#endif + if (ssm->epfd < 0) + { + errno_val = errno; + perror ("ERROR in main()"); + fprintf (stderr, "ERROR: epoll_create failed (errno = %d)!\n", + errno_val); + return ssm->epfd; + } + + ssm->listen_ev.events = EPOLLIN; + ssm->listen_ev.data.u32 = ~0; +#ifdef VCL_TEST + rv = vppcom_epoll_ctl (ssm->epfd, EPOLL_CTL_ADD, ssm->listen_fd, + &ssm->listen_ev); + if (rv < 0) + errno = -rv; +#else + rv = epoll_ctl (ssm->epfd, EPOLL_CTL_ADD, ssm->listen_fd, &ssm->listen_ev); +#endif + if (rv < 0) + { + errno_val = errno; + perror ("ERROR in main()"); + fprintf (stderr, "ERROR: epoll_ctl failed (errno = %d)!\n", errno_val); + return rv; + } +#endif + + while (1) + { +#if ! SOCK_SERVER_USE_EPOLL + _rfdset = ssm->rd_fdset; + +#ifdef VCL_TEST + rv = vppcom_select (ssm->nfds, (uint64_t *) rfdset, NULL, NULL, 0); +#else + { + struct timeval timeout; + timeout = ssm->timeout; + _wfdset = ssm->wr_fdset; + rv = select (ssm->nfds, rfdset, wfdset, NULL, &timeout); + } +#endif + if (rv < 0) + { + perror ("select()"); + fprintf (stderr, "\nERROR: select() failed -- aborting!\n"); + main_rv = -1; + goto done; + } + else if (rv == 0) + continue; + + if (FD_ISSET (ssm->listen_fd, rfdset)) + new_client (); + + for (i = 0; i < ssm->conn_pool_size; i++) + { + if (!ssm->conn_pool[i].is_alloc) + continue; + + conn = &ssm->conn_pool[i]; +#else + int num_ev; +#ifdef VCL_TEST + num_ev = vppcom_epoll_wait (ssm->epfd, ssm->wait_events, + SOCK_SERVER_MAX_EPOLL_EVENTS, 60.0); + if (num_ev < 0) + errno = -num_ev; +#else + num_ev = epoll_wait (ssm->epfd, ssm->wait_events, + SOCK_SERVER_MAX_EPOLL_EVENTS, 60000); +#endif + if (num_ev < 0) + { + perror ("epoll_wait()"); + fprintf (stderr, "\nERROR: epoll_wait() failed -- aborting!\n"); + main_rv = -1; + goto done; + } + if (num_ev == 0) + { + fprintf (stderr, "\nepoll_wait() timeout!\n"); + continue; + } + for (i = 0; i < num_ev; i++) + { + if (ssm->wait_events[i].data.u32 == ~0) + { + new_client (); + continue; + } + conn = &ssm->conn_pool[ssm->wait_events[i].data.u32]; +#endif + client_fd = conn->fd; + +#if ! SOCK_SERVER_USE_EPOLL + if (FD_ISSET (client_fd, rfdset)) +#else + if (EPOLLIN & ssm->wait_events[i].events) +#endif + { +#ifdef VCL_TEST +#if VPPCOM_SESSION_ATTR_UNIT_TEST + buflen = BUFLEN; + if (vppcom_session_attr (client_fd, VPPCOM_ATTR_GET_NREAD, + buffer, &buflen) < VPPCOM_OK) + printf ("\nNREAD: Oh no, Mr. Biiiiiiiiiiiilllllll ! ! ! !\n"); + if (vppcom_session_attr (client_fd, + VPPCOM_ATTR_GET_PEER_ADDR, + buffer, &buflen) != VPPCOM_OK) + printf ("\nGET_PEER_ADDR: Oh no, Mr. " + "Biiiiiiiiiiiilllllll ! ! ! !\n"); + buflen = BUFLEN; + if (vppcom_session_attr (client_fd, VPPCOM_ATTR_GET_LCL_ADDR, + buffer, &buflen) != VPPCOM_OK) + printf ("\nGET_LCL_ADDR: Oh no, Mr. " + "Biiiiiiiiiiiilllllll ! ! ! !\n"); +#endif +#endif + rx_bytes = sock_test_read (client_fd, conn->buf, + conn->buf_size, &conn->stats); + if (rx_bytes > 0) + { + rx_cfg = (sock_test_cfg_t *) conn->buf; + if (rx_cfg->magic == SOCK_TEST_CFG_CTRL_MAGIC) + { + if (rx_cfg->verbose) + { + printf ("SERVER (fd %d): Received a cfg message!\n", + client_fd); + sock_test_cfg_dump (rx_cfg, 0 /* is_client */ ); + } + + if (rx_bytes != sizeof (*rx_cfg)) + { + printf ("SERVER (fd %d): Invalid cfg message " + "size (%d)!\n Should be %lu bytes.\n", + client_fd, rx_bytes, sizeof (*rx_cfg)); + conn->cfg.rxbuf_size = 0; + conn->cfg.num_writes = 0; + if (conn->cfg.verbose) + { + printf ("SERVER (fd %d): Replying to " + "cfg message!\n", client_fd); + sock_test_cfg_dump (rx_cfg, 0 /* is_client */ ); + } + sock_test_write (client_fd, (uint8_t *) & conn->cfg, + sizeof (conn->cfg), NULL, + conn->cfg.verbose); + continue; + } + + switch (rx_cfg->test) + { + case SOCK_TEST_TYPE_NONE: + case SOCK_TEST_TYPE_ECHO: + sync_config_and_reply (conn, rx_cfg); + break; + + case SOCK_TEST_TYPE_BI: + case SOCK_TEST_TYPE_UNI: + stream_test_server_start_stop (conn, rx_cfg); + break; + + case SOCK_TEST_TYPE_EXIT: + printf ("SERVER: Have a great day, " + "connection %d!\n", client_fd); +#ifdef VCL_TEST + vppcom_session_close (client_fd); +#else + close (client_fd); +#endif + conn_pool_free (conn); +#if ! SOCK_SERVER_USE_EPOLL + if (ssm->nfds == (ssm->listen_fd + 1)) +#else + ssm->nfds--; + if (!ssm->nfds) +#endif + { + printf ("SERVER: All client connections " + "closed.\n\nSERVER: " + "May the force be with you!\n\n"); + goto done; + } + break; + + default: + fprintf (stderr, "ERROR: Unknown test type!\n"); + sock_test_cfg_dump (rx_cfg, 0 /* is_client */ ); + break; + } + continue; + } + + else if ((conn->cfg.test == SOCK_TEST_TYPE_UNI) || + (conn->cfg.test == SOCK_TEST_TYPE_BI)) + { + stream_test_server (conn, rx_bytes); + continue; + } + + else if (isascii (conn->buf[0])) + { + // If it looks vaguely like a string, make sure it's terminated + ((char *) conn->buf)[rx_bytes < + conn->buf_size ? rx_bytes : + conn->buf_size - 1] = 0; + printf ("SERVER (fd %d): RX (%d bytes) - '%s'\n", + conn->fd, rx_bytes, conn->buf); + } + } + else // rx_bytes < 0 + { + if (errno == ECONNRESET) + { + printf ("\nSERVER: Connection reset by remote peer.\n" + " Y'all have a great day now!\n\n"); + break; + } + else + continue; + } + + if (isascii (conn->buf[0])) + { + // If it looks vaguely like a string, make sure it's terminated + ((char *) conn->buf)[rx_bytes < + conn->buf_size ? rx_bytes : + conn->buf_size - 1] = 0; + if (xtra) + fprintf (stderr, + "ERROR: FIFO not drained in previous test!\n" + " extra chunks %u (0x%x)\n" + " extra bytes %lu (0x%lx)\n", + xtra, xtra, xtra_bytes, xtra_bytes); + + xtra = 0; + xtra_bytes = 0; + + if (conn->cfg.verbose) + printf ("SERVER (fd %d): Echoing back\n", client_fd); + + nbytes = strlen ((const char *) conn->buf) + 1; + + tx_bytes = sock_test_write (client_fd, conn->buf, + nbytes, &conn->stats, + conn->cfg.verbose); + if (tx_bytes >= 0) + printf ("SERVER (fd %d): TX (%d bytes) - '%s'\n", + conn->fd, tx_bytes, conn->buf); + } + + else // Extraneous read data from non-echo tests??? + { + xtra++; + xtra_bytes += rx_bytes; + } + } + } + } + +done: +#ifdef VCL_TEST + vppcom_session_close (ssm->listen_fd); + vppcom_app_destroy (); +#else + close (ssm->listen_fd); +#endif + if (ssm->conn_pool) + free (ssm->conn_pool); + + return main_rv; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vcl/vcl_test_client.c b/src/vcl/vcl_test_client.c new file mode 100644 index 00000000000..e1a4c6b74d4 --- /dev/null +++ b/src/vcl/vcl_test_client.c @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define VCL_TEST + +#include <vcl/vppcom.h> +#include <vcl/sock_test_client.c> + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vcl/vcl_test_server.c b/src/vcl/vcl_test_server.c new file mode 100644 index 00000000000..e91d2ecd7c8 --- /dev/null +++ b/src/vcl/vcl_test_server.c @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define VCL_TEST + +#include <vcl/vppcom.h> +#include <vcl/sock_test_server.c> + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vcl/vcom.c b/src/vcl/vcom.c new file mode 100644 index 00000000000..6d98fdb75c0 --- /dev/null +++ b/src/vcl/vcom.c @@ -0,0 +1,3334 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <unistd.h> +#include <stdio.h> +#include <signal.h> +#include <dlfcn.h> +#include <pthread.h> +#include <time.h> +#include <stdarg.h> +#include <sys/resource.h> + +#include <vcl/vcom_socket_wrapper.h> +#include <vcl/vcom.h> +#include <sys/time.h> + +#include <vcl/vppcom.h> +#include <vcl/vcom_socket.h> + +/* GCC have printf type attribute check. */ +#ifdef HAVE_FUNCTION_ATTRIBUTE_FORMAT +#define PRINTF_ATTRIBUTE(a,b) \ + __attribute__ ((__format__ (__printf__, a, b))) +#else +#define PRINTF_ATTRIBUTE(a,b) +#endif /* HAVE_FUNCTION_ATTRIBUTE_FORMAT */ + +#define HAVE_CONSTRUCTOR_ATTRIBUTE +#ifdef HAVE_CONSTRUCTOR_ATTRIBUTE +#define CONSTRUCTOR_ATTRIBUTE \ + __attribute__ ((constructor)) +#else +#define CONSTRUCTOR_ATTRIBUTE +#endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */ + +#define HAVE_DESTRUCTOR_ATTRIBUTE +#ifdef HAVE_DESTRUCTOR_ATTRIBUTE +#define DESTRUCTOR_ATTRIBUTE \ + __attribute__ ((destructor)) +#else +#define DESTRUCTOR_ATTRIBUTE +#endif + +#define HAVE_ADDRESS_SANITIZER_ATTRIBUTE +#ifdef HAVE_ADDRESS_SANITIZER_ATTRIBUTE +#define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE \ + __attribute__((no_sanitize_address)) +#else +#define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE +#endif + +#define VCOM_SOCKET_FD_MAX 0x10000 + +static char vcom_app_name[MAX_VCOM_APP_NAME]; + +/* + * RETURN: 0 on success or -1 on error. + * */ +int +vcom_set_app_name (char *__app_name) +{ + return snprintf (vcom_app_name, MAX_VCOM_APP_NAME, "vcom-%s-%d", + __app_name, getpid ()) < 0 ? -1 : 0; +} + +static char * +vcom_get_app_name () +{ + if (vcom_app_name[0] == '\0') + { + snprintf (vcom_app_name, MAX_VCOM_APP_NAME, "vcom-app-%d", getpid ()); + } + return vcom_app_name; +} + +/* + * 1 if init, 0 otherwise + */ +static int is_vcom_init; + +/* + * TBD: Make it thread safe + */ + +/* + * constructor function called before main is called + * RETURN: 0 on success -1 on failure + * */ +static inline int +vcom_init () +{ + pid_t pid = getpid (); + + if (!is_vcom_init) + { + if (vppcom_app_create (vcom_get_app_name ()) != 0) + { + printf ("\n[%d] vcom_init...failed!\n", pid); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] vcom_init: vppcom_app_create failed!\n", pid); + return -1; + } + if (vcom_socket_main_init () != 0) + { + printf ("\n[%d] vcom_init...failed!\n", pid); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] vcom_init: vcom_socket_main_init failed!\n", pid); + return -1; + } + + is_vcom_init = 1; + printf ("\n[%d] vcom_init...done!\n", pid); + } + return 0; +} + +static inline void +vcom_destroy (void) +{ + pid_t pid = getpid (); + + if (is_vcom_init) + { + vcom_socket_main_destroy (); + vppcom_app_destroy (); + is_vcom_init = 0; + fprintf (stderr, "\n[%d] vcom_destroy...done!\n", pid); + } +} + +static inline int +is_vcom_socket_fd (int fd) +{ + return vcom_socket_is_vcom_fd (fd); +} + +static inline int +is_vcom_epfd (int epfd) +{ + return vcom_socket_is_vcom_epfd (epfd); +} + + +/* + * + * Generic glibc fd api + * + */ + +/* Close the file descriptor FD. + + This function is a cancellation point and therefore + not marked with __THROW. */ +/* + * PRE: is_vcom_socket_fd(__fd) == 1 + * RETURN: 0 on success and -1 for errors. + * */ +int +vcom_close (int __fd) +{ + if (vcom_init () != 0) + { + return -1; + } + + if (vcom_socket_close (__fd) != 0) + { + return -1; + } + + return 0; +} + +/* + * RETURN: 0 on success, or -1 on error + */ +int +close (int __fd) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd) || is_vcom_epfd (__fd)) + { + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + rv = vcom_close (__fd); + if (VCOM_DEBUG > 0) + fprintf (stderr, "[%d] close: " "'%04d'='%04d'\n", pid, rv, __fd); + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_close (__fd); +} + +/* Read NBYTES into BUF from FD. Return the + number read, -1 for errors or 0 for EOF. + + This function is a cancellation point and therefore + not marked with __THROW. */ +ssize_t +vcom_read (int __fd, void *__buf, size_t __nbytes) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_read (__fd, __buf, __nbytes); +} + +ssize_t +read (int __fd, void *__buf, size_t __nbytes) +{ + ssize_t size = 0; + pid_t pid = getpid (); + pthread_t tid = pthread_self (); + + if (is_vcom_socket_fd (__fd)) + { + if (VCOM_DEBUG > 2) + fprintf (stderr, + "[%d][%lu (0x%lx)] read:1 " + "'%04d'='%04d', '%p', '%04d'\n", + pid, (unsigned long) tid, (unsigned long) tid, + (int) size, __fd, __buf, (int) __nbytes); + size = vcom_read (__fd, __buf, __nbytes); + if (VCOM_DEBUG > 2) + fprintf (stderr, + "[%d][%lu (0x%lx)] read:2 " + "'%04d'='%04d', '%p', '%04d'\n", + pid, (unsigned long) tid, (unsigned long) tid, + (int) size, __fd, __buf, (int) __nbytes); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_read (__fd, __buf, __nbytes); +} + +ssize_t +vcom_readv (int __fd, const struct iovec * __iov, int __iovcnt) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_readv (__fd, __iov, __iovcnt); +} + +ssize_t +readv (int __fd, const struct iovec * __iov, int __iovcnt) +{ + ssize_t size = 0; + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_readv (__fd, __iov, __iovcnt); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + else + return libc_readv (__fd, __iov, __iovcnt); +} + +/* Write N bytes of BUF to FD. Return the number written, or -1. + + This function is a cancellation point and therefore + not marked with __THROW. */ +ssize_t +vcom_write (int __fd, const void *__buf, size_t __n) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_write (__fd, (void *) __buf, __n); +} + +ssize_t +write (int __fd, const void *__buf, size_t __n) +{ + ssize_t size = 0; + pid_t pid = getpid (); + pthread_t tid = pthread_self (); + + if (is_vcom_socket_fd (__fd)) + { + if (VCOM_DEBUG > 2) + fprintf (stderr, + "[%d][%lu (0x%lx)] write:1 " + "'%04d'='%04d', '%p', '%04d'\n", + pid, (unsigned long) tid, (unsigned long) tid, + (int) size, __fd, __buf, (int) __n); + size = vcom_write (__fd, __buf, __n); + if (VCOM_DEBUG > 2) + fprintf (stderr, + "[%d][%lu (0x%lx)] write:2 " + "'%04d'='%04d', '%p', '%04d'\n", + pid, (unsigned long) tid, (unsigned long) tid, + (int) size, __fd, __buf, (int) __n); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_write (__fd, __buf, __n); +} + +ssize_t +vcom_writev (int __fd, const struct iovec * __iov, int __iovcnt) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_writev (__fd, __iov, __iovcnt); +} + +ssize_t +writev (int __fd, const struct iovec * __iov, int __iovcnt) +{ + ssize_t size = 0; + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_writev (__fd, __iov, __iovcnt); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + else + return libc_writev (__fd, __iov, __iovcnt); +} + +/* Do the file control operation described by CMD on FD. + The remaining arguments are interpreted depending on CMD. + + This function is a cancellation point and therefore + not marked with __THROW. */ +int +vcom_fcntl_va (int __fd, int __cmd, va_list __ap) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_fcntl_va (__fd, __cmd, __ap); +} + +int +vcom_fcntl (int __fd, int __cmd, ...) +{ + int rv = -1; + va_list ap; + + if (is_vcom_socket_fd (__fd)) + { + va_start (ap, __cmd); + rv = vcom_fcntl_va (__fd, __cmd, ap); + va_end (ap); + } + return rv; +} + +int +fcntl (int __fd, int __cmd, ...) +{ + int rv; + va_list ap; + pid_t pid = getpid (); + + va_start (ap, __cmd); + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_fcntl_va (__fd, __cmd, ap); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] fcntl: " + "'%04d'='%04d', '%04d'\n", pid, rv, __fd, __cmd); + if (rv < 0) + { + errno = -rv; + rv = -1; + } + goto out; + } + rv = libc_vfcntl (__fd, __cmd, ap); + +out: + va_end (ap); + return rv; +} + +int +vcom_ioctl_va (int __fd, unsigned long int __cmd, va_list __ap) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_ioctl_va (__fd, __cmd, __ap); +} + +int +vcom_ioctl (int __fd, unsigned long int __cmd, ...) +{ + int rv = -1; + va_list ap; + + if (is_vcom_socket_fd (__fd)) + { + va_start (ap, __cmd); + rv = vcom_ioctl_va (__fd, __cmd, ap); + va_end (ap); + } + return rv; +} + +int +ioctl (int __fd, unsigned long int __cmd, ...) +{ + int rv; + va_list ap; + pid_t pid = getpid (); + + va_start (ap, __cmd); + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_ioctl_va (__fd, __cmd, ap); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] ioctl: " + "'%04d'='%04d', '%04ld'\n", pid, rv, __fd, __cmd); + if (rv < 0) + { + errno = -rv; + rv = -1; + } + goto out; + } + rv = libc_vioctl (__fd, __cmd, ap); + +out: + va_end (ap); + return rv; +} + +/* + * Check the first NFDS descriptors each in READFDS (if not NULL) for + * read readiness, in WRITEFDS (if not NULL) for write readiness, + * and in EXCEPTFDS (if not NULL) for exceptional conditions. + * If TIMEOUT is not NULL, time out after waiting the interval + * specified therein. Returns the number of ready descriptors, + * or -1 for errors. + * + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ + +/* + * clear all vcom FDs from fd_sets __readfds, __writefds and + * __exceptfds and update the new nfds + * + * new nfds is the highest-numbered file descriptor + * in any of the three sets, plus 1 + * + * Return the number of file descriptors contained in the + * three descriptor sets. ie. the total number of the bits + * that are set in __readfds, __writefds and __exceptfds + */ +static inline int +vcom_fd_clear (int __nfds, + int *__new_nfds, + fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds) +{ + int fd; + /* invalid max_fd is -1 */ + int max_fd = -1; + int nfd = 0; + + + /* clear all vcom fd from the sets */ + for (fd = 0; fd < __nfds; fd++) + { + + /* clear vcom fd from set */ + /* + * F fd set + */ +#define _(F) \ + if ((F) && FD_ISSET (fd, (F))) \ + { \ + if (is_vcom_socket_fd (fd)) \ + { \ + FD_CLR (fd, (F)); \ + } \ + } + + + _(__readfds); + _(__writefds); + _(__exceptfds); +#undef _ + } + + /* + * compute nfd and __new_nfds + */ + for (fd = 0; fd < __nfds; fd++) + { + + /* + * F fd set + */ +#define _(F) \ + if ((F) && FD_ISSET (fd, (F))) \ + { \ + if (fd > max_fd) \ + { \ + max_fd = fd; \ + } \ + ++nfd; \ + } + + + _(__readfds); + _(__writefds); + _(__exceptfds); +#undef _ + } + + *__new_nfds = max_fd != -1 ? max_fd + 1 : 0; + return nfd; +} + +/* + * Return the number of file descriptors contained in the + * three descriptor sets. ie. the total number of the bits + * that are set in __readfds, __writefds and __exceptfds + */ +static inline int +vcom_fd_set (int __nfds, + /* dest */ + int *__new_nfds, + fd_set * __restrict __readfds, + fd_set * __restrict __writefds, fd_set * __restrict __exceptfds, + /* src */ + fd_set * __restrict __saved_readfds, + fd_set * __restrict __saved_writefds, + fd_set * __restrict __saved_exceptfds) +{ + int fd; + /* invalid max_fd is -1 */ + int max_fd = -1; + int nfd = 0; + + for (fd = 0; fd < __nfds; fd++) + { + /* + * F fd set + * S saved fd set + */ +#define _(S,F) \ + if ((F) && (S) && FD_ISSET (fd, (S))) \ + { \ + if (is_vcom_socket_fd (fd)) \ + { \ + FD_SET (fd, (F)); \ + } \ + } + + + _(__saved_readfds, __readfds); + _(__saved_writefds, __writefds); +#undef _ + } + + + /* + * compute nfd and __new_nfds + */ + for (fd = 0; fd < __nfds; fd++) + { + + /* + * F fd set + */ +#define _(F) \ + if ((F) && FD_ISSET (fd, (F))) \ + { \ + if (fd > max_fd) \ + { \ + max_fd = fd; \ + } \ + ++nfd; \ + } + + + _(__readfds); + _(__writefds); + _(__exceptfds); +#undef _ + } + + *__new_nfds = max_fd != -1 ? max_fd + 1 : 0; + return nfd; +} + +/* + * split select sets(src) into + * vcom sets(dest1) and libc sets(dest2) + */ +static inline void +vcom_fd_set_split ( + /* src, select sets */ + int nfds, + fd_set * __restrict readfds, + fd_set * __restrict writefds, + fd_set * __restrict exceptfds, + /* dest1, vcom sets */ + int *vcom_nfds, + fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds, int *vcom_nfd, + /* dest2, libc sets */ + int *libc_nfds, + fd_set * __restrict libc_readfds, + fd_set * __restrict libc_writefds, + fd_set * __restrict libc_exceptfds, int *libc_nfd) +{ + int fd; + + /* vcom */ + /* invalid max_fd is -1 */ + int vcom_max_fd = -1; + int vcom_nfd2 = 0; + + /* libc */ + /* invalid max_fd is -1 */ + int libc_max_fd = -1; + int libc_nfd2 = 0; + + + for (fd = 0; fd < nfds; fd++) + { + /* + * S select fd set + * V vcom fd set + * L libc fd set + */ +#define _(S,V,L) \ + if ((S) && FD_ISSET (fd, (S))) \ + { \ + if (is_vcom_socket_fd (fd)) \ + { \ + if ((V)) \ + { \ + FD_SET(fd, (V)); \ + if (fd > vcom_max_fd) \ + { \ + vcom_max_fd = fd; \ + } \ + ++vcom_nfd2; \ + } \ + } \ + else \ + { \ + if ((L)) \ + { \ + FD_SET(fd, (L)); \ + if (fd > libc_max_fd) \ + { \ + libc_max_fd = fd; \ + } \ + ++libc_nfd2; \ + } \ + } \ + } + + + _(readfds, vcom_readfds, libc_readfds); + _(writefds, vcom_writefds, libc_writefds); + _(exceptfds, vcom_exceptfds, libc_exceptfds); +#undef _ + } + + if (vcom_nfds) + *vcom_nfds = vcom_max_fd != -1 ? vcom_max_fd + 1 : 0; + if (vcom_nfd) + *vcom_nfd = vcom_nfd2; + if (libc_nfds) + *libc_nfds = libc_max_fd != -1 ? libc_max_fd + 1 : 0; + if (libc_nfd) + *libc_nfd = libc_nfd2; +} + +/* + * merge vcom sets(src1) and libc sets(src2) + * into select sets(dest) + */ +static inline void +vcom_fd_set_merge ( + /* dest, select sets */ + int *nfds, + fd_set * __restrict readfds, + fd_set * __restrict writefds, + fd_set * __restrict exceptfds, int *nfd, + /* src1, vcom sets */ + int vcom_nfds, + fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds, int vcom_nfd, + /* src2, libc sets */ + int libc_nfds, + fd_set * __restrict libc_readfds, + fd_set * __restrict libc_writefds, + fd_set * __restrict libc_exceptfds, int libc_nfd) +{ + int fd; + /* invalid max_fd is -1 */ + int max_fd = -1; + int nfd2 = 0; + + + /* FD_BIT_OR + * + * dest |= src at current bit index + * update MAX and NFD of dest fd set + * + * + * FS source fd set + * FD dest fd set + * BI bit index + * MAX current max_fd of dest fd sets + * NFD current nfd of dest fd sets + * N nfds of source fd set + */ +#define FD_BIT_OR(FD,FS,BI, \ + MAX,NFD) \ + if ((FS) && (FD) && FD_ISSET ((BI), (FS))) \ + { \ + FD_SET ((BI), (FD)); \ + if ((BI) > (MAX)) \ + { \ + (MAX) = (BI); \ + } \ + ++(NFD); \ + } + + + /* FD_RWE_SET_OR */ + /* + * SR,SW,SE source RWE fd sets + * DR,DW,DE dest RWE fd sets + * BI bit index + * NFDS nfds of source fd sets + * MAX current max_fd of dest fd sets + * NFD current nfd of dest fd sets + */ +#define FD_RWE_SETS_OR(DR,DW,DE, \ + SR,SW,SE, \ + BI,NFDS, \ + MAX,NFD) \ + do \ + { \ + for ((BI) = 0; (BI) < (NFDS); (BI)++) \ + { \ + FD_BIT_OR((DR), (SR), (BI), (MAX), (NFD)); \ + FD_BIT_OR((DW), (SW), (BI), (MAX), (NFD)); \ + FD_BIT_OR((DE), (SE), (BI), (MAX), (NFD)); \ + } \ + } \ + while (0); + + + /* source(vcom) to dest(select) rwe fd sets */ + FD_RWE_SETS_OR (readfds, writefds, exceptfds, + vcom_readfds, vcom_writefds, vcom_exceptfds, + fd, vcom_nfds, max_fd, nfd2); + + /* source(libc) to dest(select) rwe fd sets */ + FD_RWE_SETS_OR (readfds, writefds, exceptfds, + libc_readfds, libc_writefds, libc_exceptfds, + fd, libc_nfds, max_fd, nfd2); + +#undef FD_RWE_SETS_OR +#undef FD_BIT_OR + + if (nfds) + *nfds = max_fd != -1 ? max_fd + 1 : 0; + if (nfd) + *nfd = nfd2; +} + +/* + * RETURN 1 if fds is NULL or empty. 0 otherwise + */ +static inline int +fd_set_iszero (fd_set * __restrict fds) +{ + int fd; + + /* NULL fds */ + if (!fds) + return 1; + + for (fd = 0; fd < FD_SETSIZE; fd++) + { + if (FD_ISSET (fd, fds)) + { + /* non-empty fds */ + return 0; + } + } + /* empty fds */ + return 1; +} + + +/* + * ################ + * kernel time64.h + * ################ + * */ +typedef long int s64; +typedef unsigned long int u64; + +typedef long long int __s64; +typedef unsigned long long int __u64; + +typedef __s64 time64_t; +typedef __u64 timeu64_t; + +/* Parameters used to convert the timespec values: */ +#define MSEC_PER_SEC 1000L +#define USEC_PER_MSEC 1000L +#define NSEC_PER_USEC 1000L +#define NSEC_PER_MSEC 1000000L +#define USEC_PER_SEC 1000000L +#define NSEC_PER_SEC 1000000000L +#define FSEC_PER_SEC 1000000000000000LL + + +/* + * ################ + * kernel time.h + * ################ + * */ + + +#define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1) + +#ifdef VCOM_USE_TIMESPEC_EQUAL +static inline int +timespec_equal (const struct timespec *a, const struct timespec *b) +{ + return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec); +} +#endif + +/* + * lhs < rhs: return <0 + * lhs == rhs: return 0 + * lhs > rhs: return >0 + */ +static inline int +timespec_compare (const struct timespec *lhs, const struct timespec *rhs) +{ + if (lhs->tv_sec < rhs->tv_sec) + return -1; + if (lhs->tv_sec > rhs->tv_sec) + return 1; + return lhs->tv_nsec - rhs->tv_nsec; +} + +#ifdef VCOM_USE_TIMEVAL_COMPARE +static inline int +timeval_compare (const struct timeval *lhs, const struct timeval *rhs) +{ + if (lhs->tv_sec < rhs->tv_sec) + return -1; + if (lhs->tv_sec > rhs->tv_sec) + return 1; + return lhs->tv_usec - rhs->tv_usec; +} +#endif + +extern void set_normalized_timespec (struct timespec *ts, time_t sec, + s64 nsec); + +static inline struct timespec +timespec_add (struct timespec lhs, struct timespec rhs) +{ + struct timespec ts_delta; + set_normalized_timespec (&ts_delta, lhs.tv_sec + rhs.tv_sec, + lhs.tv_nsec + rhs.tv_nsec); + return ts_delta; +} + +/* + * sub = lhs - rhs, in normalized form + */ +static inline struct timespec +timespec_sub (struct timespec lhs, struct timespec rhs) +{ + struct timespec ts_delta; + set_normalized_timespec (&ts_delta, lhs.tv_sec - rhs.tv_sec, + lhs.tv_nsec - rhs.tv_nsec); + return ts_delta; +} + +/* + * ################ + * kernel time.c + * ################ + * */ + + +/** + * set_normalized_timespec - set timespec sec and nsec parts and normalize + * + * @ts: pointer to timespec variable to be set + * @sec: seconds to set + * @nsec: nanoseconds to set + * + * Set seconds and nanoseconds field of a timespec variable and + * normalize to the timespec storage format + * + * Note: The tv_nsec part is always in the range of + * 0 <= tv_nsec < NSEC_PER_SEC + * For negative values only the tv_sec field is negative ! + */ +void +set_normalized_timespec (struct timespec *ts, time_t sec, s64 nsec) +{ + while (nsec >= NSEC_PER_SEC) + { + /* + * The following asm() prevents the compiler from + * optimising this loop into a modulo operation. See + * also __iter_div_u64_rem() in include/linux/time.h + */ + asm ("":"+rm" (nsec)); + nsec -= NSEC_PER_SEC; + ++sec; + } + while (nsec < 0) + { + asm ("":"+rm" (nsec)); + nsec += NSEC_PER_SEC; + --sec; + } + ts->tv_sec = sec; + ts->tv_nsec = nsec; +} + +#define vcom_timerisvalid(tvp) (!((tvp)->tv_sec < 0 || (tvp)->tv_usec < 0)) + +/* Macros for converting between `struct timeval' and `struct timespec'. */ +#define VCOM_TIMEVAL_TO_TIMESPEC(tv, ts) { \ + (ts)->tv_sec = (tv)->tv_sec; \ + (ts)->tv_nsec = (tv)->tv_usec * 1000; \ +} +#define VCOM_TIMESPEC_TO_TIMEVAL(tv, ts) { \ + (tv)->tv_sec = (ts)->tv_sec; \ + (tv)->tv_usec = (ts)->tv_nsec / 1000; \ +} + +static inline int +vcom_select_impl (int vcom_nfds, fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds, + struct timeval *__restrict timeout) +{ + return vcom_socket_select (vcom_nfds, vcom_readfds, + vcom_writefds, vcom_exceptfds, timeout); +} + +int +vcom_select (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + struct timeval *__restrict __timeout) +{ + int rv; + int rv2 = 0; + pid_t pid = getpid (); + + int timedout = 0; + /* block indefinitely */ + int no_timeout = 0; + int first_clock_gettime_failed = 0; + /* timeout value in units of timespec */ + struct timespec timeout_ts; + struct timespec start_time, now, end_time; + + /* select sets attributes - after merge */ + int new_nfds = 0; + int new_nfd = -1; + + /* vcom */ + int vcom_nfds = 0; + fd_set vcom_readfds; + fd_set vcom_writefds; + fd_set vcom_exceptfds; + int vcom_nfd = -1; + + /* libc */ + int libc_nfds = 0; + fd_set libc_readfds; + fd_set libc_writefds; + fd_set libc_exceptfds; + int libc_nfd = -1; + + /* for polling */ + struct timeval tv = {.tv_sec = 0,.tv_usec = 0 }; + + /* validate __timeout */ + if (__timeout) + { + /* validate tv_sec */ + /* bogus */ + if (!vcom_timerisvalid (__timeout)) + { + rv = -EINVAL; + goto select_done; + } + + /* validate tv_usec */ + /* TBD: */ + /* init timeout_ts */ + VCOM_TIMEVAL_TO_TIMESPEC (__timeout, &timeout_ts); + set_normalized_timespec (&timeout_ts, + timeout_ts.tv_sec, timeout_ts.tv_nsec); + } + + rv = clock_gettime (CLOCK_MONOTONIC, &start_time); + if (rv == -1) + { + rv = -errno; + first_clock_gettime_failed = 1; + goto select_done; + } + + /* init end_time */ + if (__timeout) + { + if (timerisset (__timeout)) + { + end_time = timespec_add (start_time, timeout_ts); + } + else + { + /* + * if both fields of the timeout structure are zero, + * then select returns immediately + * */ + end_time = start_time; + } + } + else + { + /* block indefinitely */ + no_timeout = 1; + } + + + + if (vcom_init () != 0) + { + rv = -1; + goto select_done; + } + + /* validate __nfds */ + if (__nfds < 0 || __nfds > FD_SETSIZE) + { + rv = -EINVAL; + goto select_done; + } + + + /* + * usleep(3) emulation + * */ + + /* call libc_select() with a finite timeout and + * no file descriptors or empty fd sets and + * zero nfds */ + if (__nfds == 0 && + (!__readfds || fd_set_iszero (__readfds)) && + (!__writefds || fd_set_iszero (__writefds)) && + (!__exceptfds || fd_set_iszero (__exceptfds))) + { + if (__timeout) + { + rv = libc_select (__nfds, + __readfds, __writefds, __exceptfds, __timeout); + if (rv == -1) + rv = -errno; + } + else + { + /* TBD: block indefinitely or return -EINVAL */ + rv = -EINVAL; + } + goto select_done; + } + + /* init once before the polling loop */ + + /* zero vcom and libc fd sets */ + /* + * S select fd set + * V vcom fd set + * L libc fd set + */ +#define _(S,V,L) \ + if ((S)) \ + { \ + FD_ZERO ((V)); \ + FD_ZERO ((L)); \ + } + + + _(__readfds, &vcom_readfds, &libc_readfds); + _(__writefds, &vcom_writefds, &libc_writefds); + _(__exceptfds, &vcom_exceptfds, &libc_exceptfds); +#undef _ + new_nfds = 0; + new_nfd = -1; + + vcom_nfds = 0; + vcom_nfd = -1; + libc_nfds = 0; + libc_nfd = -1; + + vcom_fd_set_split ( + /* src, select sets */ + __nfds, __readfds, __writefds, __exceptfds, + /* dest1, vcom sets */ + __readfds || __writefds || __exceptfds ? + &vcom_nfds : NULL, + __readfds ? &vcom_readfds : NULL, + __writefds ? &vcom_writefds : NULL, + __exceptfds ? &vcom_exceptfds : NULL, + __readfds || __writefds || __exceptfds ? + &vcom_nfd : NULL, + /* dest2, libc sets */ + __readfds || __writefds || __exceptfds ? + &libc_nfds : NULL, + __readfds ? &libc_readfds : NULL, + __writefds ? &libc_writefds : NULL, + __exceptfds ? &libc_exceptfds : NULL, + __readfds || __writefds || __exceptfds ? + &libc_nfd : NULL); + + /* + * polling loop + * */ + do + { + new_nfd = -1; + vcom_nfd = -1; + libc_nfd = -1; + + /* + * if both fields of timeval structure are zero, + * vcom_select_impl and libc_select returns immediately. + * useful for polling and ensure fairness among + * file descriptors watched. + */ + + /* for polling */ + tv.tv_sec = 0; + tv.tv_usec = 0; + + /* select on vcom fds */ + if (vcom_nfds) + { + vcom_nfd = vcom_select_impl (vcom_nfds, + __readfds ? &vcom_readfds : NULL, + __writefds ? &vcom_writefds : NULL, + __exceptfds ? &vcom_exceptfds : NULL, + &tv); + if (VCOM_DEBUG > 2) + fprintf (stderr, + "[%d] select vcom: " + "'%04d'='%04d'\n", pid, vcom_nfd, vcom_nfds); + + if (vcom_nfd < 0) + { + rv = vcom_nfd; + goto select_done; + } + } + /* select on libc fds */ + if (libc_nfds) + { + libc_nfd = libc_select (libc_nfds, + __readfds ? &libc_readfds : NULL, + __writefds ? &libc_writefds : NULL, + __exceptfds ? &libc_exceptfds : NULL, &tv); + if (VCOM_DEBUG > 2) + fprintf (stderr, + "[%d] select libc: " + "'%04d'='%04d'\n", pid, libc_nfd, libc_nfds); + + if (libc_nfd < 0) + { + /* tv becomes undefined */ + libc_nfd = errno; + rv = libc_nfd; + goto select_done; + } + } + + /* check if any file descriptors changed status */ + if ((vcom_nfds && vcom_nfd > 0) || (libc_nfds && libc_nfd > 0)) + { + /* zero the sets before merge and exit */ + + /* + * F fd set + */ +#define _(F) \ + if ((F)) \ + { \ + FD_ZERO ((F)); \ + } + + + _(__readfds); + _(__writefds); + _(__exceptfds); +#undef _ + new_nfds = 0; + new_nfd = -1; + + /* + * on exit, sets are modified in place to indicate which + * file descriptors actually changed status + * */ + vcom_fd_set_merge ( + /* dest, select sets */ + __readfds || __writefds || __exceptfds ? + &new_nfds : NULL, + __readfds, + __writefds, + __exceptfds, + __readfds || __writefds || __exceptfds ? + &new_nfd : NULL, + /* src1, vcom sets */ + vcom_nfds, + __readfds ? &vcom_readfds : NULL, + __writefds ? &vcom_writefds : NULL, + __exceptfds ? &vcom_exceptfds : NULL, vcom_nfd, + /* src2, libc sets */ + libc_nfds, + __readfds ? &libc_readfds : NULL, + __writefds ? &libc_writefds : NULL, + __exceptfds ? &libc_exceptfds : NULL, libc_nfd); + /* + * return the number of file descriptors contained in the + * three returned sets + * */ + rv = 0; + /* + * for documentation + * + * if(vcom_nfd > 0) + * rv += vcom_nfd; + * if(libc_nfd > 0) + * rv += libc_nfd; + */ + + rv = new_nfd == -1 ? 0 : new_nfd; + goto select_done; + } + + rv = clock_gettime (CLOCK_MONOTONIC, &now); + if (rv == -1) + { + rv = -errno; + goto select_done; + } + } + while (no_timeout || timespec_compare (&now, &end_time) < 0); + + /* timeout expired before anything interesting happened */ + timedout = 1; + rv = 0; + +select_done: + if (VCOM_DEBUG > 2) + fprintf (stderr, "[%d] vselect1: " "'%04d'='%04d'\n", pid, rv, __nfds); + /* + * modify timeout parameter to reflect the amount of time not slept + * */ + if (__timeout) + { + if (vcom_timerisvalid (__timeout)) + { + /* timeout expired */ + if (timedout) + { + timerclear (__timeout); + } + else if (!first_clock_gettime_failed) + { + rv2 = clock_gettime (CLOCK_MONOTONIC, &now); + if (rv2 == -1) + { + rv = -errno; + } + else + { + struct timespec ts_delta; + ts_delta = timespec_sub (end_time, now); + VCOM_TIMESPEC_TO_TIMEVAL (__timeout, &ts_delta); + } + } + } + } + if (VCOM_DEBUG > 2) + fprintf (stderr, "[%d] vselect2: " "'%04d',='%04d'\n", pid, rv, __nfds); + + return rv; +} + +int +vcom_select_internal (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + struct timeval *__restrict __timeout) +{ + int rv; + int new_nfds = 0; + int nfd = 0; + pid_t pid = getpid (); + + fd_set saved_readfds; + fd_set saved_writefds; + fd_set saved_exceptfds; + + /* validate __nfds */ + if (__nfds < 0) + { + errno = EINVAL; + return -1; + } + + /* validate __timeout */ + if (__timeout) + { + /* validate tv_sec */ + /* bogus */ + if (__timeout->tv_sec < 0 || __timeout->tv_usec < 0) + { + errno = EINVAL; + return -1; + } + + /* validate tv_usec */ + /* TBD: */ + } + + /* init saved_x fds */ + if (__readfds) + { + saved_readfds = *__readfds; + /* + memcpy (&saved_readfds, __readfds, sizeof (*__readfds)); + */ + } + else + { + FD_ZERO (&saved_readfds); + } + + if (__writefds) + { + saved_writefds = *__writefds; + /* + memcpy (&saved_writefds, __writefds, sizeof (*__writefds)); + */ + + } + else + { + FD_ZERO (&saved_writefds); + } + + if (__exceptfds) + { + saved_exceptfds = *__exceptfds; + /* + memcpy (&saved_exceptfds, __exceptfds, sizeof (*__exceptfds)); + */ + + } + else + { + FD_ZERO (&saved_exceptfds); + } + + /* clear vcom fds */ + nfd = vcom_fd_clear (__nfds, &new_nfds, __readfds, __writefds, __exceptfds); + + /* set to an invalid value */ + rv = -2; + /* have kernel fds */ + if (new_nfds) + rv = libc_select (new_nfds, __readfds, + __writefds, __exceptfds, __timeout); + + if (new_nfds && rv == -1) + { + /* on error, the file descriptor sets are unmodified */ + if (__readfds) + *__readfds = saved_readfds; + if (__writefds) + *__writefds = saved_writefds; + if (__exceptfds) + *__exceptfds = saved_exceptfds; + return rv; + } + else if ((new_nfds && rv != -1) || (rv == -2)) + { + /* restore vcom fds */ + nfd = vcom_fd_set (__nfds, + &new_nfds, + __readfds, + __writefds, + __exceptfds, + &saved_readfds, &saved_writefds, &saved_exceptfds); + rv = nfd; + } + + if (VCOM_DEBUG > 0) + fprintf (stderr, "[%d] select: " "'%04d'='%04d'\n", pid, rv, __nfds); + return rv; +} + +int +select (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, struct timeval *__restrict __timeout) +{ + int rv = 0; + pid_t pid = getpid (); + + if (VCOM_DEBUG > 2) + fprintf (stderr, "[%d] select1: " "'%04d'='%04d'\n", pid, rv, __nfds); + rv = vcom_select (__nfds, __readfds, __writefds, __exceptfds, __timeout); + if (VCOM_DEBUG > 2) + fprintf (stderr, "[%d] select2: " "'%04d'='%04d'\n", pid, rv, __nfds); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; +} + +#ifdef __USE_XOPEN2K +/* + * Same as above only that the TIMEOUT value is given with higher + * resolution and a sigmask which is been set temporarily. This + * version should be used. + * + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +int +vcom_pselect (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + const struct timespec *__restrict __timeout, + const __sigset_t * __restrict __sigmask) +{ + int fd; + int vcom_nfds = 0; + + for (fd = 0; fd < __nfds; fd++) + { + if (__readfds && FD_ISSET (fd, __readfds)) + { + if (is_vcom_socket_fd (fd)) + { + vcom_nfds++; + } + } + + if (__writefds && FD_ISSET (fd, __writefds)) + { + if (is_vcom_socket_fd (fd)) + { + vcom_nfds++; + } + } + if (__exceptfds && FD_ISSET (fd, __exceptfds)) + { + if (is_vcom_socket_fd (fd)) + { + FD_CLR (fd, __exceptfds); + } + } + } + return vcom_nfds; +} + +int +pselect (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + const struct timespec *__restrict __timeout, + const __sigset_t * __restrict __sigmask) +{ + int rv; + int new_nfds = 0; + int nfd = 0; + pid_t pid = getpid (); + + fd_set saved_readfds; + fd_set saved_writefds; + fd_set saved_exceptfds; + + /* validate __nfds */ + if (__nfds < 0) + { + errno = EINVAL; + return -1; + } + + /* validate __timeout */ + if (__timeout) + { + /* validate tv_sec */ + /* bogus */ + if (__timeout->tv_sec < 0 || __timeout->tv_nsec < 0) + { + errno = EINVAL; + return -1; + } + + /* validate tv_usec */ + /* TBD: */ + } + + /* init saved fds */ + if (__readfds) + { + saved_readfds = *__readfds; + /* + memcpy (&saved_readfds, __readfds, sizeof (*__readfds)); + */ + } + else + { + FD_ZERO (&saved_readfds); + } + + if (__writefds) + { + saved_writefds = *__writefds; + /* + memcpy (&saved_writefds, __writefds, sizeof (*__writefds)); + */ + + } + else + { + FD_ZERO (&saved_writefds); + } + + if (__exceptfds) + { + saved_exceptfds = *__exceptfds; + /* + memcpy (&saved_exceptfds, __exceptfds, sizeof (*__exceptfds)); + */ + + } + else + { + FD_ZERO (&saved_exceptfds); + } + + /* clear vcom fds */ + nfd = vcom_fd_clear (__nfds, &new_nfds, __readfds, __writefds, __exceptfds); + + /* set to an invalid value */ + rv = -2; + if (new_nfds) + rv = libc_pselect (new_nfds, + __readfds, + __writefds, __exceptfds, __timeout, __sigmask); + + if (new_nfds && rv == -1) + { + /* on error, the file descriptor sets are unmodified */ + if (__readfds) + *__readfds = saved_readfds; + if (__writefds) + *__writefds = saved_writefds; + if (__exceptfds) + *__exceptfds = saved_exceptfds; + return rv; + } + else if ((new_nfds && rv != -1) || (rv == -2)) + { + /* restore vcom fds */ + nfd = vcom_fd_set (__nfds, + &new_nfds, + __readfds, + __writefds, + __exceptfds, + &saved_readfds, &saved_writefds, &saved_exceptfds); + rv = nfd; + } + + if (VCOM_DEBUG > 2) + fprintf (stderr, "[%d] pselect: " "'%04d'='%04d'\n", pid, rv, __nfds); + return rv; +} +#endif + +/* + * + * Socket specific glibc api + * + */ + +/* Create a new socket of type TYPE in domain DOMAIN, using + * protocol PROTOCOL. If PROTOCOL is zero, one is chosen + * automatically. Returns a file descriptor for the new socket, + * or -1 for errors. + * RETURN: a valid file descriptor for the new socket, + * or -1 for errors. + * */ + +int +vcom_socket (int __domain, int __type, int __protocol) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_socket (__domain, __type, __protocol); +} + +int +socket (int __domain, int __type, int __protocol) +{ + int rv; + pid_t pid = getpid (); + pthread_t tid = pthread_self (); + + /* handle domains implemented by vpp */ + switch (__domain) + { + case AF_INET: + case AF_INET6: + /* handle types implemented by vpp */ + switch (__type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + { + case SOCK_STREAM: + case SOCK_DGRAM: + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + rv = vcom_socket (__domain, __type, __protocol); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d][%lu (0x%lx)] socket: " + "'%04d'= D='%04d', T='%04d', P='%04d'\n", + pid, (unsigned long) tid, (unsigned long) tid, + rv, __domain, __type, __protocol); + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; + break; + + default: + goto CALL_GLIBC_SOCKET_API; + break; + } + + break; + + default: + goto CALL_GLIBC_SOCKET_API; + break; + } + +CALL_GLIBC_SOCKET_API: + return libc_socket (__domain, __type, __protocol); +} + +/* + * Create two new sockets, of type TYPE in domain DOMAIN and using + * protocol PROTOCOL, which are connected to each other, and put file + * descriptors for them in FDS[0] and FDS[1]. If PROTOCOL is zero, + * one will be chosen automatically. + * Returns 0 on success, -1 for errors. + * */ +int +vcom_socketpair (int __domain, int __type, int __protocol, int __fds[2]) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_socketpair (__domain, __type, __protocol, __fds); +} + +int +socketpair (int __domain, int __type, int __protocol, int __fds[2]) +{ + int rv; + pid_t pid = getpid (); + + /* handle domains implemented by vpp */ + switch (__domain) + { + case AF_INET: + case AF_INET6: + /* handle types implemented by vpp */ + switch (__type) + { + case SOCK_STREAM: + case SOCK_DGRAM: + rv = vcom_socketpair (__domain, __type, __protocol, __fds); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] socketpair: " + "'%04d'= D='%04d', T='%04d', P='%04d'\n", + pid, rv, __domain, __type, __protocol); + if (rv < 0) + { + errno = -rv; + return -1; + } + return 0; + break; + + default: + goto CALL_GLIBC_SOCKET_API; + break; + } + + break; + + default: + goto CALL_GLIBC_SOCKET_API; + break; + } + +CALL_GLIBC_SOCKET_API: + return libc_socketpair (__domain, __type, __protocol, __fds); +} + +/* + * Give the socket FD the local address ADDR + * (which is LEN bytes long). + * */ +int +vcom_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len) +{ + int rv; + + if (vcom_init () != 0) + { + return -1; + } + + /* validate __len */ + switch (__addr->sa_family) + { + case AF_INET: + if (__len != sizeof (struct sockaddr_in)) + return -EINVAL; + break; + case AF_INET6: + if (__len != sizeof (struct sockaddr_in6)) + return -EINVAL; + break; + + default: + return -1; + break; + } + + /* handle domains implemented by vpp */ + switch (__addr->sa_family) + { + case AF_INET: + case AF_INET6: + rv = vcom_socket_bind (__fd, __addr, __len); + return rv; + break; + + default: + return -1; + break; + } + + return -1; +} + +int +bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + + rv = vcom_bind (__fd, __addr, __len); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] bind: " + "'%04d'='%04d', '%p', '%04d'\n", + pid, rv, __fd, __addr, __len); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_bind (__fd, __addr, __len); +} + +/* + * Put the local address of FD into *ADDR and its length in *LEN. + * */ +int +vcom_getsockname (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_getsockname (__fd, __addr, __len); +} + +int +getsockname (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_getsockname (__fd, __addr, __len); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] getsockname: " + "'%04d'='%04d', '%p', '%p'\n", pid, rv, __fd, __addr, __len); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_getsockname (__fd, __addr, __len); +} + +/* + * Open a connection on socket FD to peer at ADDR + * (which LEN bytes long). For connectionless socket types, just set + * the default address to send to and the only address from which to + * accept transmissions. Return 0 on success, -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +int +vcom_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len) +{ + int rv = -1; + + if (vcom_init () != 0) + { + return -1; + } + + /* validate __len */ + switch (__addr->sa_family) + { + case AF_INET: + if (__len != INET_ADDRSTRLEN) + return -1; + break; + case AF_INET6: + if (__len != INET6_ADDRSTRLEN) + return -1; + break; + + default: + return -1; + break; + } + + /* handle domains implemented by vpp */ + switch (__addr->sa_family) + { + case AF_INET: + case AF_INET6: + rv = vcom_socket_connect (__fd, __addr, __len); + break; + + default: + return -1; + break; + } + + return rv; +} + +int +connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len) +{ + int rv; + pid_t pid = getpid (); + pthread_t tid = pthread_self (); + + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_connect (__fd, __addr, __len); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d][%lu (0x%lx)] connect: " + "'%04d'='%04d', '%p', '%04d'\n", + pid, (unsigned long) tid, (unsigned long) tid, + rv, __fd, __addr, __len); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + + return libc_connect (__fd, __addr, __len); +} + +/* + * Put the address of the peer connected to socket FD into *ADDR + * (which is *LEN bytes long), and its actual length into *LEN. + * */ +int +vcom_getpeername (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_getpeername (__fd, __addr, __len); +} + +int +getpeername (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_getpeername (__fd, __addr, __len); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] getpeername: " + "'%04d'='%04d', '%p', '%p'\n", pid, rv, __fd, __addr, __len); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_getpeername (__fd, __addr, __len); +} + +/* + * Send N bytes of BUF to socket FD. Returns the number sent or -1. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +ssize_t +vcom_send (int __fd, const void *__buf, size_t __n, int __flags) +{ + + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_send (__fd, (void *) __buf, (int) __n, __flags); +} + +ssize_t +send (int __fd, const void *__buf, size_t __n, int __flags) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_send (__fd, __buf, __n, __flags); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] send: " + "'%04d'='%04d', '%p', '%04d', '%04x'\n", + pid, (int) size, __fd, __buf, (int) __n, __flags); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_send (__fd, __buf, __n, __flags); +} + +/* + * Read N bytes into BUF from socket FD. + * Returns the number read or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +ssize_t +vcom_recv (int __fd, void *__buf, size_t __n, int __flags) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_recv (__fd, __buf, __n, __flags); +} + +ssize_t +recv (int __fd, void *__buf, size_t __n, int __flags) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_recv (__fd, __buf, __n, __flags); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] recv: " + "'%04d'='%04d', '%p', '%04d', '%04x'\n", + pid, (int) size, __fd, __buf, (int) __n, __flags); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_recv (__fd, __buf, __n, __flags); +} + +/* + * Send N bytes of BUF on socket FD to peer at address ADDR (which is + * ADDR_LEN bytes long). Returns the number sent, or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +ssize_t +vcom_sendto (int __fd, const void *__buf, size_t __n, int __flags, + __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_sendto (__fd, __buf, __n, __flags, __addr, __addr_len); +} + +ssize_t +sendto (int __fd, const void *__buf, size_t __n, int __flags, + __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_sendto (__fd, __buf, __n, __flags, __addr, __addr_len); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] sendto: " + "'%04d'='%04d', '%p', '%04d', '%04x', " + "'%p', '%04d'\n", + pid, (int) size, __fd, __buf, (int) __n, __flags, + __addr, __addr_len); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_sendto (__fd, __buf, __n, __flags, __addr, __addr_len); +} + +/* + * Read N bytes into BUF through socket FD. + * If ADDR is not NULL, fill in *ADDR_LEN bytes of it with the + * address of the sender, and store the actual size of the address + * in *ADDR_LEN. + * Returns the number of bytes read or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +ssize_t +vcom_recvfrom (int __fd, void *__restrict __buf, size_t __n, + int __flags, + __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_recvfrom (__fd, __buf, __n, __flags, __addr, __addr_len); +} + +ssize_t +recvfrom (int __fd, void *__restrict __buf, size_t __n, + int __flags, + __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_recvfrom (__fd, __buf, __n, __flags, __addr, __addr_len); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] recvfrom: " + "'%04d'='%04d', '%p', '%04d', '%04x', " + "'%p', '%p'\n", + pid, (int) size, __fd, __buf, (int) __n, __flags, + __addr, __addr_len); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_recvfrom (__fd, __buf, __n, __flags, __addr, __addr_len); +} + +/* + * Send a message described MESSAGE on socket FD. + * Returns the number of bytes sent, or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +ssize_t +vcom_sendmsg (int __fd, const struct msghdr * __message, int __flags) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_sendmsg (__fd, __message, __flags); +} + +ssize_t +sendmsg (int __fd, const struct msghdr * __message, int __flags) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_sendmsg (__fd, __message, __flags); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] sendmsg: " + "'%04d'='%04d', '%p', '%04x'\n", + pid, (int) size, __fd, __message, __flags); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_sendmsg (__fd, __message, __flags); +} + +#ifdef __USE_GNU +/* + * Send a VLEN messages as described by VMESSAGES to socket FD. + * Returns the number of datagrams successfully written + * or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +int +vcom_sendmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_sendmmsg (__fd, __message, __vlen, __flags); +} + +int +sendmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_sendmmsg (__fd, __message, __vlen, __flags); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] sendmmsg: " + "'%04d'='%04d', '%p', '%04d', '%04x'\n", + pid, (int) size, __fd, __vmessages, __vlen, __flags); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_sendmmsg (__fd, __message, __vlen, __flags); +} + +#endif + +/* + * Receive a message as described by MESSAGE from socket FD. + * Returns the number of bytes read or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +ssize_t +vcom_recvmsg (int __fd, struct msghdr * __message, int __flags) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_recvmsg (__fd, __message, __flags); +} + +ssize_t +recvmsg (int __fd, struct msghdr * __message, int __flags) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_recvmsg (__fd, __message, __flags); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] recvmsg: " + "'%04d'='%04d', '%p', '%04x'\n", + pid, (int) size, __fd, __message, __flags); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_recvmsg (__fd, __message, __flags); +} + +#ifdef __USE_GNU +/* + * Receive up to VLEN messages as described by VMESSAGES from socket FD. + * Returns the number of messages received or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +int +vcom_recvmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags, struct timespec *__tmo) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_recvmmsg (__fd, __message, __vlen, __flags, __tmo); +} + +int +recvmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags, struct timespec *__tmo) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_recvmmsg (__fd, __message, __vlen, __flags, __tmo); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] recvmmsg: " + "'%04d'='%04d', '%p', " + "'%04d', '%04x', '%p'\n", + pid, (int) size, __fd, __vmessages, __vlen, __flags, __tmo); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_recvmmsg (__fd, __message, __vlen, __flags, __tmo); +} + +#endif + +/* + * Put the current value for socket FD's option OPTNAME + * at protocol level LEVEL into OPTVAL (which is *OPTLEN bytes long), + * and set *OPTLEN to the value's actual length. + * Returns 0 on success, -1 for errors. + * */ +int +vcom_getsockopt (int __fd, int __level, int __optname, + void *__restrict __optval, socklen_t * __restrict __optlen) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_getsockopt (__fd, __level, __optname, + __optval, __optlen); +} + +int +getsockopt (int __fd, int __level, int __optname, + void *__restrict __optval, socklen_t * __restrict __optlen) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_getsockopt (__fd, __level, __optname, __optval, __optlen); + if (VCOM_DEBUG > 2) + fprintf (stderr, + "[%d] getsockopt: " + "'%04d'='%04d', '%04d', '%04d', " + "'%p', '%p'\n", + pid, rv, __fd, __level, __optname, __optval, __optlen); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_getsockopt (__fd, __level, __optname, __optval, __optlen); +} + +/* + * Set socket FD's option OPTNAME at protocol level LEVEL + * to *OPTVAL (which is OPTLEN bytes long). + * Returns 0 on success, -1 for errors. + * */ +int +vcom_setsockopt (int __fd, int __level, int __optname, + const void *__optval, socklen_t __optlen) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_setsockopt (__fd, __level, __optname, + __optval, __optlen); +} + +int +setsockopt (int __fd, int __level, int __optname, + const void *__optval, socklen_t __optlen) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_setsockopt (__fd, __level, __optname, __optval, __optlen); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] setsockopt: " + "'%04d'='%04d', '%04d', '%04d', " + "'%p', '%04d'\n", + pid, rv, __fd, __level, __optname, __optval, __optlen); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_setsockopt (__fd, __level, __optname, __optval, __optlen); +} + +/* + * Prepare to accept connections on socket FD. + * N connection requests will be queued before further + * requests are refused. + * Returns 0 on success, -1 for errors. + * */ +int +vcom_listen (int __fd, int __n) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_listen (__fd, __n); +} + +int +listen (int __fd, int __n) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_listen (__fd, __n); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] listen: " + "'%04d'='%04d', '%04d'\n", pid, rv, __fd, __n); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_listen (__fd, __n); +} + +/* + * Await a connection on socket FD. + * When a connection arrives, open a new socket to communicate + * with it, set *ADDR (which is *ADDR_LEN bytes long) to the address + * of the connecting peer and *ADDR_LEN to the address's actual + * length, and return the new socket's descriptor, or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +int +vcom_accept (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len) +{ + + if (vcom_init () != 0) + { + return -1; + } + return vcom_socket_accept (__fd, __addr, __addr_len); +} + +int +accept (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len) +{ + int rv = -1; + pid_t pid = getpid (); + pthread_t tid = pthread_self (); + + if (is_vcom_socket_fd (__fd)) + { + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d][%lu (0x%lx)] accept1: " + "'%04d'='%04d', '%p', '%p'\n", + pid, (unsigned long) tid, (unsigned long) tid, + rv, __fd, __addr, __addr_len); + rv = vcom_accept (__fd, __addr, __addr_len); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d][%lu (0x%lx)] accept2: " + "'%04d'='%04d', '%p', '%p'\n", + pid, (unsigned long) tid, (unsigned long) tid, + rv, __fd, __addr, __addr_len); + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; + } + return libc_accept (__fd, __addr, __addr_len); +} + +#ifdef __USE_GNU +/* + * Similar to 'accept' but takes an additional parameter to specify + * flags. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +int +vcom_accept4 (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int __flags) +{ + + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_accept4 (__fd, __addr, __addr_len, __flags); +} + +int +accept4 (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int __flags) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + rv = vcom_accept4 (__fd, __addr, __addr_len, __flags); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] accept4: " + "'%04d'='%04d', '%p', '%p', '%04x'\n", + pid, rv, __fd, __addr, __addr_len, __flags); + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; + } + return libc_accept4 (__fd, __addr, __addr_len, __flags); +} + +#endif + +/* + * Shut down all or part of the connection open on socket FD. + * HOW determines what to shut down: + * SHUT_RD = No more receptions; + * SHUT_WR = No more transmissions; + * SHUT_RDWR = No more receptions or transmissions. + * Returns 0 on success, -1 for errors. + * */ +int +vcom_shutdown (int __fd, int __how) +{ + if (vcom_init () != 0) + { + return -1; + } + return vcom_socket_shutdown (__fd, __how); +} + +int +shutdown (int __fd, int __how) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_shutdown (__fd, __how); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] shutdown: " + "'%04d'='%04d', '%04d'\n", pid, rv, __fd, __how); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_shutdown (__fd, __how); +} + +int +vcom_epoll_create (int __size) +{ + + if (vcom_init () != 0) + { + return -1; + } + + if (__size <= 0) + { + return -EINVAL; + } + + /* __size argument is ignored "thereafter" */ + return vcom_epoll_create1 (0); +} + +/* + * __size argument is ignored, but must be greater than zero + */ +int +epoll_create (int __size) +{ + int rv = 0; + pid_t pid = getpid (); + + rv = vcom_epoll_create (__size); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] epoll_create: " "'%04d'='%04d'\n", pid, rv, __size); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; +} + +int +vcom_epoll_create1 (int __flags) +{ + if (vcom_init () != 0) + { + return -1; + } + + if (__flags < 0) + { + return -EINVAL; + } + if (__flags & ~EPOLL_CLOEXEC) + { + return -EINVAL; + } + /* __flags can be either zero or EPOLL_CLOEXEC */ + /* implementation */ + return vcom_socket_epoll_create1 (__flags); +} + +/* + * __flags can be either zero or EPOLL_CLOEXEC + * */ +int +epoll_create1 (int __flags) +{ + int rv = 0; + pid_t pid = getpid (); + + rv = vcom_epoll_create1 (__flags); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] epoll_create: " "'%04d'='%08x'\n", pid, rv, __flags); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; +} + +static inline int +ep_op_has_event (int op) +{ + return op != EPOLL_CTL_DEL; +} + +int +vcom_epoll_ctl (int __epfd, int __op, int __fd, struct epoll_event *__event) +{ + if (vcom_init () != 0) + { + return -1; + } + + /* + * the requested operation __op is not supported + * by this interface */ + if (!((__op == EPOLL_CTL_ADD) || + (__op == EPOLL_CTL_MOD) || (__op == EPOLL_CTL_DEL))) + { + return -EINVAL; + } + + /* op is ADD or MOD but event parameter is NULL */ + if ((ep_op_has_event (__op) && !__event)) + { + return -EFAULT; + } + + /* fd is same as epfd */ + /* do not permit adding an epoll file descriptor inside itself */ + if (__epfd == __fd) + { + return -EINVAL; + } + + /* implementation */ + return vcom_socket_epoll_ctl (__epfd, __op, __fd, __event); +} + +/* + * implement the controller interface for epoll + * that enables the insertion/removal/change of + * file descriptors inside the interest set. + */ +int +epoll_ctl (int __epfd, int __op, int __fd, struct epoll_event *__event) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_epfd (__epfd)) + { + /* TBD: currently limiting epoll to support only vcom fds */ + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_epoll_ctl (__epfd, __op, __fd, __event); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] epoll_ctl: " + "'%04d'='%04d', '%04d', '%04d'\n", + pid, rv, __epfd, __op, __fd); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + else + { + /* + * TBD: currently epoll does not support kernel fds + * or epoll fds */ + errno = EBADF; + return -1; + } + } + else + { + /* epfd is not an epoll file descriptor */ + errno = EINVAL; + return -1; + } + return 0; +} + +int +vcom_epoll_wait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_epoll_pwait (__epfd, __events, __maxevents, __timeout, NULL); +} + +int +epoll_wait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout) +{ + int rv; + pid_t pid = getpid (); + + if (__maxevents <= 0 || __maxevents > EP_MAX_EVENTS) + { + errno = EINVAL; + return -1; + } + + if (is_vcom_epfd (__epfd)) + { + rv = vcom_epoll_wait (__epfd, __events, __maxevents, __timeout); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] epoll_wait: " + "'%04d'='%04d', '%p', " + "'%04d', '%04d'\n", + pid, rv, __epfd, __events, __maxevents, __timeout); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; + } + else + { + errno = EINVAL; + return -1; + } + return 0; +} + + +int +vcom_epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, const __sigset_t * __ss) +{ + if (vcom_init () != 0) + { + return -1; + } + + /* implementation */ + return vcom_socket_epoll_pwait (__epfd, __events, + __maxevents, __timeout, __ss); +} + +int +epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, const __sigset_t * __ss) +{ + int rv; + pid_t pid = getpid (); + + if (__maxevents <= 0 || __maxevents > EP_MAX_EVENTS) + { + errno = EINVAL; + return -1; + } + + if (is_vcom_epfd (__epfd)) + { + rv = vcom_epoll_pwait (__epfd, __events, __maxevents, __timeout, __ss); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] epoll_pwait: " + "'%04d'='%04d', '%p', " + "'%04d', '%04d', " + "'%p'\n", + pid, rv, __epfd, __events, __maxevents, __timeout, __ss); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; + } + else + { + errno = EINVAL; + return -1; + } + + return 0; +} + +/* Poll the file descriptors described by the NFDS structures starting at + FDS. If TIMEOUT is nonzero and not -1, allow TIMEOUT milliseconds for + an event to occur; if TIMEOUT is -1, block until an event occurs. + Returns the number of file descriptors with events, zero if timed out, + or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ + +int +vcom_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout) +{ + int rv = 0; + pid_t pid = getpid (); + + struct rlimit nofile_limit; + struct pollfd vcom_fds[MAX_POLL_NFDS_DEFAULT]; + nfds_t fds_idx = 0; + + /* actual set of file descriptors to be monitored */ + nfds_t libc_nfds = 0; + nfds_t vcom_nfds = 0; + + /* ready file descriptors + * + * number of structures which have nonzero revents fields + * in other words, descriptors with events or errors reported. + * */ + /* after call to libc_poll () */ + int rlibc_nfds = 0; + /* after call to vcom_socket_poll () */ + int rvcom_nfds = 0; + + + /* timeout value in units of timespec */ + struct timespec timeout_ts; + struct timespec start_time, now, end_time; + + + /* get start_time */ + rv = clock_gettime (CLOCK_MONOTONIC, &start_time); + if (rv == -1) + { + rv = -errno; + goto poll_done; + } + + /* set timeout_ts & end_time */ + if (__timeout >= 0) + { + /* set timeout_ts */ + timeout_ts.tv_sec = __timeout / MSEC_PER_SEC; + timeout_ts.tv_nsec = (__timeout % MSEC_PER_SEC) * NSEC_PER_MSEC; + set_normalized_timespec (&timeout_ts, + timeout_ts.tv_sec, timeout_ts.tv_nsec); + /* set end_time */ + if (__timeout) + { + end_time = timespec_add (start_time, timeout_ts); + } + else + { + end_time = start_time; + } + } + + if (vcom_init () != 0) + { + rv = -1; + goto poll_done; + } + + /* validate __fds */ + if (!__fds) + { + rv = -EFAULT; + goto poll_done; + } + + /* validate __nfds */ + /*TBD: call getrlimit once when vcl-ldpreload library is init */ + rv = getrlimit (RLIMIT_NOFILE, &nofile_limit); + if (rv != 0) + { + rv = -errno; + goto poll_done; + } + if (__nfds >= nofile_limit.rlim_cur) + { + rv = -EINVAL; + goto poll_done; + } + + /* + * for the POC, it's fair to assume that nfds is less than 1024 + * */ + if (__nfds >= MAX_POLL_NFDS_DEFAULT) + { + rv = -EINVAL; + goto poll_done; + } + + /* set revents field (output parameter) + * to zero + * */ + for (fds_idx = 0; fds_idx < __nfds; fds_idx++) + { + __fds[fds_idx].revents = 0; + } + +#if 0 + /* set revents field (output parameter) + * to zero for user ignored fds + * */ + for (fds_idx = 0; fds_idx < __nfds; fds_idx++) + { + /* + * if negative fd, ignore events field + * and set output parameter (revents field) to zero */ + if (__fds[fds_idx].fd < 0) + { + __fds[fds_idx].revents = 0; + } + } +#endif + + /* + * 00. prepare __fds and vcom_fds for polling + * copy __fds to vcom_fds + * 01. negate all except libc fds in __fds, + * ignore user negated fds + * 02. negate all except vcom_fds in vocm fds, + * ignore user negated fds + * ignore fd 0 by setting it to negative number + * */ + memcpy (vcom_fds, __fds, sizeof (*__fds) * __nfds); + libc_nfds = 0; + vcom_nfds = 0; + for (fds_idx = 0; fds_idx < __nfds; fds_idx++) + { + /* ignore negative fds */ + if (__fds[fds_idx].fd < 0) + { + continue; + } + + /* + * 00. ignore vcom fds in __fds + * 01. ignore libc fds in vcom_fds, + * ignore fd 0 by setting it to negative number. + * as fd 0 cannot be ignored. + */ + if (is_vcom_socket_fd (__fds[fds_idx].fd) || + is_vcom_epfd (__fds[fds_idx].fd)) + { + __fds[fds_idx].fd = -__fds[fds_idx].fd; + vcom_nfds++; + } + else + { + libc_nfds++; + /* ignore fd 0 by setting it to negative number */ + if (!vcom_fds[fds_idx].fd) + { + vcom_fds[fds_idx].fd = -1; + } + vcom_fds[fds_idx].fd = -vcom_fds[fds_idx].fd; + } + } + + /* + * polling loop + * + * poll on libc fds and vcom fds + * + * specifying a timeout of zero causes libc_poll() and + * vcom_socket_poll() to return immediately, even if no + * file descriptors are ready + * */ + do + { + rlibc_nfds = 0; + rvcom_nfds = 0; + + /* + * timeout parameter for libc_poll () set to zero + * to poll on libc fds + * */ + + /* poll on libc fds */ + if (libc_nfds) + { + /* + * a timeout of zero causes libc_poll() + * to return immediately + * */ + rlibc_nfds = libc_poll (__fds, __nfds, 0); + if (VCOM_DEBUG > 2) + fprintf (stderr, + "[%d] poll libc: " + "'%04d'='%08lu'\n", pid, rlibc_nfds, __nfds); + + if (rlibc_nfds < 0) + { + rv = -errno; + goto poll_done_update_nfds; + } + } + + /* + * timeout parameter for vcom_socket_poll () set to zero + * to poll on vcom fds + * */ + + /* poll on vcom fds */ + if (vcom_nfds) + { + /* + * a timeout of zero causes vcom_socket_poll() + * to return immediately + * */ + rvcom_nfds = vcom_socket_poll (vcom_fds, __nfds, 0); + if (VCOM_DEBUG > 2) + fprintf (stderr, + "[%d] poll vcom: " + "'%04d'='%08lu'\n", pid, rvcom_nfds, __nfds); + if (rvcom_nfds < 0) + { + rv = rvcom_nfds; + goto poll_done_update_nfds; + } + } + + /* check if any file descriptors changed status */ + if ((libc_nfds && rlibc_nfds > 0) || (vcom_nfds && rvcom_nfds > 0)) + { + /* something interesting happened */ + rv = rlibc_nfds + rvcom_nfds; + goto poll_done_update_nfds; + } + + rv = clock_gettime (CLOCK_MONOTONIC, &now); + if (rv == -1) + { + rv = -errno; + goto poll_done_update_nfds; + } + } + + /* block indefinitely || timeout elapsed */ + while ((__timeout < 0) || timespec_compare (&now, &end_time) < 0); + + /* timeout expired before anything interesting happened */ + rv = 0; + +poll_done_update_nfds: + for (fds_idx = 0; fds_idx < __nfds; fds_idx++) + { + /* ignore negative fds in vcom_fds + * 00. user negated fds + * 01. libc fds + * */ + if (vcom_fds[fds_idx].fd < 0) + { + continue; + } + + /* from here on handle positive vcom fds */ + /* + * restore vcom fds to positive number in __fds + * and update revents in __fds with the events + * that actually occurred in vcom fds + * */ + __fds[fds_idx].fd = -__fds[fds_idx].fd; + if (rvcom_nfds) + { + __fds[fds_idx].revents = vcom_fds[fds_idx].revents; + } + } + +poll_done: + if (VCOM_DEBUG > 2) + fprintf (stderr, "[%d] vpoll: " "'%04d'='%08lu'\n", pid, rv, __nfds); + return rv; +} + +/* + * 00. The field __fds[i].fd contains a file descriptor for an + * open file. + * If this field is negative, then the corresponding + * events field is ignored and the revents field returns zero. + * The field __fds[i].events is an input parameter. + * The field __fds[i].revents is an output parameter. + * 01. Specifying a negative value in timeout + * means an infinite timeout. + * Specifying a timeout of zero causes poll() to return + * immediately, even if no file descriptors are ready. + * + * NOTE: observed __nfds is less than 128 from kubecon strace files + */ + + +int +poll (struct pollfd *__fds, nfds_t __nfds, int __timeout) +{ + int rv = 0; + pid_t pid = getpid (); + + + if (VCOM_DEBUG > 2) + fprintf (stderr, "[%d] poll1: " "'%04d'='%08lu, %d, 0x%x'\n", + pid, rv, __nfds, __fds[0].fd, __fds[0].events); + rv = vcom_poll (__fds, __nfds, __timeout); + if (VCOM_DEBUG > 2) + fprintf (stderr, "[%d] poll2: " "'%04d'='%08lu, %d, 0x%x'\n", + pid, rv, __nfds, __fds[0].fd, __fds[0].revents); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; +} + +#ifdef __USE_GNU +/* Like poll, but before waiting the threads signal mask is replaced + with that specified in the fourth parameter. For better usability, + the timeout value is specified using a TIMESPEC object. + + This function is a cancellation point and therefore not marked with + __THROW. */ +int +vcom_ppoll (struct pollfd *__fds, nfds_t __nfds, + const struct timespec *__timeout, const __sigset_t * __ss) +{ + if (vcom_init () != 0) + { + return -1; + } + + return -EOPNOTSUPP; +} + +int +ppoll (struct pollfd *__fds, nfds_t __nfds, + const struct timespec *__timeout, const __sigset_t * __ss) +{ + int rv = 0; + + errno = EOPNOTSUPP; + rv = -1; + return rv; +} +#endif + +void CONSTRUCTOR_ATTRIBUTE vcom_constructor (void); + +void DESTRUCTOR_ATTRIBUTE vcom_destructor (void); + +void +vcom_constructor (void) +{ + pid_t pid = getpid (); + + swrap_constructor (); + if (vcom_init () != 0) + { + printf ("\n[%d] vcom_constructor...failed!\n", pid); + } + else + { + printf ("\n[%d] vcom_constructor...done!\n", pid); + } +} + +/* + * This function is called when the library is unloaded + */ +void +vcom_destructor (void) +{ + pid_t pid = getpid (); + + vcom_destroy (); + swrap_destructor (); + printf ("\n[%d] vcom_destructor...done!\n", pid); +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vcl/vcom.h b/src/vcl/vcom.h new file mode 100644 index 00000000000..da9dc5f7bf4 --- /dev/null +++ b/src/vcl/vcom.h @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vcom_h +#define included_vcom_h + +/* VCOM DEBUG flag. Setting this to 1 or 0 turns off + ASSERT & other debugging code. */ +#ifndef VCOM_DEBUG +#define VCOM_DEBUG 0 +#endif + +#include <vcl/vcom_glibc_socket.h> + +#define MAX_VCOM_APP_NAME 256 + +/* Returns 0 on success or -1 on error. */ +extern int vcom_set_app_name (char *__app_name); + +/* + * + * File descriptor based APIs + * + */ + +/* + * vpp implementation of glibc APIs from <unistd.h> + */ +extern int vcom_close (int __fd); + +extern ssize_t __wur vcom_read (int __fd, void *__buf, size_t __nbytes); + +extern ssize_t __wur vcom_write (int __fd, const void *__buf, size_t __n); + +extern ssize_t __wur vcom_readv (int __fd, const struct iovec *__iov, + int __iovcnt); + +extern ssize_t __wur vcom_writev (int __fd, const struct iovec *__iov, + int __iovcnt); + +/* + * vpp implementation of glibc APIs from <fcntl.h> + */ +extern int vcom_fcntl (int __fd, int __cmd, ...); + +/* + * VPP implementation of glibc APIs ioctl + */ +extern int vcom_ioctl (int __fd, unsigned long int __cmd, ...); + +/* + * vpp implementation of glibc APIs from <sys/select.h> + */ +extern int +vcom_select (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + struct timeval *__restrict __timeout); + +#ifdef __USE_XOPEN2K +extern int +vcom_pselect (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + const struct timespec *__restrict __timeout, + const __sigset_t * __restrict __sigmask); +#endif + +/* + * vpp implementation of glibc APIs from <sys/socket.h> + */ +extern int __THROW vcom_socket (int __domain, int __type, int __protocol); + +/* On Linux, the only supported domain for this call is AF_UNIX +* (or synonymously, AF_LOCAL). Most implementations have the +* same restriction. +* vpp does not implement AF_UNIX domain in this release. +* */ +extern int __THROW +vcom_socketpair (int __domain, int __type, int __protocol, int __fds[2]); + +extern int __THROW +vcom_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len); + +extern int __THROW +vcom_getsockname (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len); + +extern int +vcom_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len); + +extern int __THROW +vcom_getpeername (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len); + +extern ssize_t +vcom_send (int __fd, const void *__buf, size_t __n, int __flags); + +extern ssize_t vcom_recv (int __fd, void *__buf, size_t __n, int __flags); + +extern ssize_t +vcom_sendto (int __fd, const void *__buf, size_t __n, + int __flags, __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len); + +extern ssize_t +vcom_recvfrom (int __fd, void *__restrict __buf, + size_t __n, int __flags, + __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len); + +extern ssize_t +vcom_sendmsg (int __fd, const struct msghdr *__message, int __flags); + +#ifdef __USE_GNU +extern int +sendmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags); +#endif + +extern ssize_t vcom_recvmsg (int __fd, struct msghdr *__message, int __flags); + +#ifdef __USE_GNU +extern int +vcom_recvmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags, struct timespec *__tmo); +#endif + +extern int __THROW +vcom_getsockopt (int __fd, int __level, int __optname, + void *__restrict __optval, socklen_t * __restrict __optlen); + +extern int __THROW +vcom_setsockopt (int __fd, int __level, int __optname, + const void *__optval, socklen_t __optlen); + +extern int __THROW vcom_listen (int __fd, int __n); + +extern int +vcom_accept (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len); + +#ifdef __USE_GNU +/* + * Similar to 'accept' but takes an additional parameter to specify + * flags. + * */ +/* TBD: implemented later */ +extern int +vcom_accept4 (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int __flags); +#endif + +extern int __THROW vcom_shutdown (int __fd, int __how); + +extern int __THROW vcom_epoll_create (int __size); + +extern int __THROW vcom_epoll_create1 (int __flags); + +extern int __THROW +vcom_epoll_ctl (int __epfd, int __op, int __fd, struct epoll_event *__event); + +extern int +vcom_epoll_wait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout); + +extern int +vcom_epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, const __sigset_t * __ss); + +/* + * NOTE: observed __nfds is less than 128 from kubecon strace files + * for the POC, it's fair to assume that nfds is less than 1024. + * TBD: make it thread safe and design to scale. + * */ +#define MAX_POLL_NFDS_DEFAULT 1024 +extern int vcom_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout); + +#ifdef __USE_GNU +extern int +vcom_ppoll (struct pollfd *__fds, nfds_t __nfds, + const struct timespec *__timeout, const __sigset_t * __ss); +#endif + + +#endif /* included_vcom_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vcl/vcom_glibc_socket.h b/src/vcl/vcom_glibc_socket.h new file mode 100644 index 00000000000..4eb60fb65c7 --- /dev/null +++ b/src/vcl/vcom_glibc_socket.h @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vcom_glibc_socket_h +#define included_vcom_glibc_socket_h + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/select.h> +#include <arpa/inet.h> +#include <fcntl.h> + +#include <sys/epoll.h> + +#include <signal.h> +#include <poll.h> + +/* + * + * Generic glibc fd api + * + */ +/* + * glibc APIs from <unistd.h> + */ + +/* Close the file descriptor FD. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int close (int __fd); + +/* Read NBYTES into BUF from FD. Return the + number read, -1 for errors or 0 for EOF. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t __wur read (int __fd, void *__buf, size_t __nbytes); + +/* Write N bytes of BUF to FD. Return the number written, or -1. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t __wur write (int __fd, const void *__buf, size_t __n); + + +/* + * glibc APIs from <fcntl.h> + */ + +/* Do the file control operation described by CMD on FD. + The remaining arguments are interpreted depending on CMD. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int fcntl (int __fd, int __cmd, ...); + + +/* + * glibc APIs from <sys/select.h> + */ + +/* Check the first NFDS descriptors each in READFDS (if not NULL) for read + readiness, in WRITEFDS (if not NULL) for write readiness, and in EXCEPTFDS + (if not NULL) for exceptional conditions. If TIMEOUT is not NULL, time out + after waiting the interval specified therein. Returns the number of ready + descriptors, or -1 for errors. + + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int +select (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + struct timeval *__restrict __timeout); + +#ifdef __USE_XOPEN2K +/* Same as above only that the TIMEOUT value is given with higher + resolution and a sigmask which is been set temporarily. This version + should be used. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int +pselect (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + const struct timespec *__restrict __timeout, + const __sigset_t * __restrict __sigmask); +#endif + + +/* + * + * Socket specific glibc api + * + */ + +/* + * glibc APIs from <sys/socket.h> + */ + +/* Create a new socket of type TYPE in domain DOMAIN, using + protocol PROTOCOL. If PROTOCOL is zero, one is chosen automatically. + Returns a file descriptor for the new socket, or -1 for errors. */ +extern int __THROW socket (int __domain, int __type, int __protocol); + +/* Create two new sockets, of type TYPE in domain DOMAIN and using + protocol PROTOCOL, which are connected to each other, and put file + descriptors for them in FDS[0] and FDS[1]. If PROTOCOL is zero, + one will be chosen automatically. Returns 0 on success, -1 for errors. */ +extern int __THROW +socketpair (int __domain, int __type, int __protocol, int __fds[2]); + +/* Give the socket FD the local address ADDR (which is LEN bytes long). */ +extern int __THROW +bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len); + +/* Put the local address of FD into *ADDR and its length in *LEN. */ +extern int __THROW +getsockname (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len); + +/* Open a connection on socket FD to peer at ADDR (which LEN bytes long). + For connectionless socket types, just set the default address to send to + and the only address from which to accept transmissions. + Return 0 on success, -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len); + +/* Put the address of the peer connected to socket FD into *ADDR + (which is *LEN bytes long), and its actual length into *LEN. */ +extern int __THROW +getpeername (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len); + +/* Send N bytes of BUF to socket FD. Returns the number sent or -1. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t send (int __fd, const void *__buf, size_t __n, int __flags); + +/* Read N bytes into BUF from socket FD. + Returns the number read or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t recv (int __fd, void *__buf, size_t __n, int __flags); + +/* Send N bytes of BUF on socket FD to peer at address ADDR (which is + ADDR_LEN bytes long). Returns the number sent, or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t +sendto (int __fd, const void *__buf, size_t __n, + int __flags, __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len); + +/* Read N bytes into BUF through socket FD. + If ADDR is not NULL, fill in *ADDR_LEN bytes of it with tha address of + the sender, and store the actual size of the address in *ADDR_LEN. + Returns the number of bytes read or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t +recvfrom (int __fd, void *__restrict __buf, + size_t __n, int __flags, + __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len); + +/* Send a message described MESSAGE on socket FD. + Returns the number of bytes sent, or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t +sendmsg (int __fd, const struct msghdr *__message, int __flags); + +#ifdef __USE_GNU +/* Send a VLEN messages as described by VMESSAGES to socket FD. + Returns the number of datagrams successfully written or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int +sendmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags); +#endif + +/* Receive a message as described by MESSAGE from socket FD. + Returns the number of bytes read or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t recvmsg (int __fd, struct msghdr *__message, int __flags); + +#ifdef __USE_GNU +/* Receive up to VLEN messages as described by VMESSAGES from socket FD. + Returns the number of messages received or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int +recvmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags, struct timespec *__tmo); +#endif + + +/* Put the current value for socket FD's option OPTNAME at protocol level LEVEL + into OPTVAL (which is *OPTLEN bytes long), and set *OPTLEN to the value's + actual length. Returns 0 on success, -1 for errors. */ +extern int __THROW +getsockopt (int __fd, int __level, int __optname, + void *__restrict __optval, socklen_t * __restrict __optlen); + +/* Set socket FD's option OPTNAME at protocol level LEVEL + to *OPTVAL (which is OPTLEN bytes long). + Returns 0 on success, -1 for errors. */ +extern int __THROW +setsockopt (int __fd, int __level, int __optname, + const void *__optval, socklen_t __optlen); + +/* Prepare to accept connections on socket FD. + N connection requests will be queued before further requests are refused. + Returns 0 on success, -1 for errors. */ +extern int __THROW listen (int __fd, int __n); + +/* Await a connection on socket FD. + When a connection arrives, open a new socket to communicate with it, + set *ADDR (which is *ADDR_LEN bytes long) to the address of the connecting + peer and *ADDR_LEN to the address's actual length, and return the + new socket's descriptor, or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int +accept (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len); + +#ifdef __USE_GNU +/* Similar to 'accept' but takes an additional parameter to specify flags. + + This function is a cancellation point and therefore not marked with + __THROW. */ + /* TBD: implemented later */ +extern int +accept4 (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int __flags); +#endif + +/* Shut down all or part of the connection open on socket FD. + HOW determines what to shut down: + SHUT_RD = No more receptions; + SHUT_WR = No more transmissions; + SHUT_RDWR = No more receptions or transmissions. + Returns 0 on success, -1 for errors. */ +extern int __THROW shutdown (int __fd, int __how); + + +/* + * glibc APIs from <sys/epoll.h> + */ + +/* Creates an epoll instance. Returns an fd for the new instance. + The "size" parameter is a hint specifying the number of file + descriptors to be associated with the new instance. The fd + returned by epoll_create() should be closed with close(). */ +extern int __THROW epoll_create (int __size); + +/* Same as epoll_create but with an FLAGS parameter. The unused SIZE + parameter has been dropped. */ +extern int __THROW epoll_create1 (int __flags); + +/* Manipulate an epoll instance "epfd". Returns 0 in case of success, + -1 in case of error ( the "errno" variable will contain the + specific error code ) The "op" parameter is one of the EPOLL_CTL_* + constants defined above. The "fd" parameter is the target of the + operation. The "event" parameter describes which events the caller + is interested in and any associated user data. */ +extern int __THROW +epoll_ctl (int __epfd, int __op, int __fd, struct epoll_event *__event); + +#define EP_INT_MAX ((int)(~0U>>1)) +#define EP_MAX_EVENTS (EP_INT_MAX / sizeof(struct epoll_event)) + +/* Wait for events on an epoll instance "epfd". Returns the number of + triggered events returned in "events" buffer. Or -1 in case of + error with the "errno" variable set to the specific error code. The + "events" parameter is a buffer that will contain triggered + events. The "maxevents" is the maximum number of events to be + returned ( usually size of "events" ). The "timeout" parameter + specifies the maximum wait time in milliseconds (-1 == infinite). + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int +epoll_wait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout); + +/* Same as epoll_wait, but the thread's signal mask is temporarily + and atomically replaced with the one provided as parameter. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int +epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, const __sigset_t * __ss); + +/* Poll the file descriptors described by the NFDS structures starting at + FDS. If TIMEOUT is nonzero and not -1, allow TIMEOUT milliseconds for + an event to occur; if TIMEOUT is -1, block until an event occurs. + Returns the number of file descriptors with events, zero if timed out, + or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int poll (struct pollfd *__fds, nfds_t __nfds, int __timeout); + +#ifdef __USE_GNU +/* Like poll, but before waiting the threads signal mask is replaced + with that specified in the fourth parameter. For better usability, + the timeout value is specified using a TIMESPEC object. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int ppoll (struct pollfd *__fds, nfds_t __nfds, + const struct timespec *__timeout, const __sigset_t * __ss); +#endif + + +#endif /* included_vcom_glibc_socket_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vcl/vcom_socket.c b/src/vcl/vcom_socket.c new file mode 100644 index 00000000000..8806ebd011a --- /dev/null +++ b/src/vcl/vcom_socket.c @@ -0,0 +1,3443 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <unistd.h> +#include <stdio.h> +#include <sys/uio.h> +#include <limits.h> +#define __need_IOV_MAX +#include <bits/stdio_lim.h> +#include <netinet/tcp.h> + +#include <vppinfra/types.h> +#include <vppinfra/hash.h> +#include <vppinfra/pool.h> + +#include <vcl/vcom_socket.h> +#include <vcl/vcom_socket_wrapper.h> +#include <vcl/vcom.h> + +#include <vcl/vppcom.h> + + +/* + * VCOM_SOCKET Private definitions and functions. + */ + +typedef struct vcom_socket_main_t_ +{ + u8 init; + + /* vcom_socket pool */ + vcom_socket_t *vsockets; + + /* Hash table for socketidx to fd mapping */ + uword *sockidx_by_fd; + + /* vcom_epoll pool */ + vcom_epoll_t *vepolls; + + /* Hash table for epollidx to epfd mapping */ + uword *epollidx_by_epfd; + + + /* common epitem poll for all epfd */ + /* TBD: epitem poll per epfd */ + /* vcom_epitem pool */ + vcom_epitem_t *vepitems; + + /* Hash table for epitemidx to epfdfd mapping */ + uword *epitemidx_by_epfdfd; + + /* Hash table - key:epfd, value:vec of epitemidx */ + uword *epitemidxs_by_epfd; + /* Hash table - key:fd, value:vec of epitemidx */ + uword *epitemidxs_by_fd; + +} vcom_socket_main_t; + +vcom_socket_main_t vcom_socket_main; + + +static int +vcom_socket_open_socket (int domain, int type, int protocol) +{ + int rv = -1; + + /* handle domains implemented by vpp */ + switch (domain) + { + case AF_INET: + case AF_INET6: + /* get socket type and + * handle the socket types supported by vpp */ + switch (type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + { + case SOCK_STREAM: + case SOCK_DGRAM: + /* the type argument serves a second purpose, + * in addition to specifying a socket type, + * it may include the bitwise OR of any of + * SOCK_NONBLOCK and SOCK_CLOEXEC, to modify + * the behavior of socket. */ + rv = libc_socket (domain, type, protocol); + if (rv == -1) + rv = -errno; + break; + + default: + break; + } + + break; + + default: + break; + } + + return rv; +} + +static int +vcom_socket_open_epoll (int flags) +{ + int rv = -1; + + if (flags < 0) + { + return -EINVAL; + } + if (flags && (flags & ~EPOLL_CLOEXEC)) + { + return -EINVAL; + } + + /* flags can be either zero or EPOLL_CLOEXEC */ + rv = libc_epoll_create1 (flags); + if (rv == -1) + rv = -errno; + + return rv; +} + +static int +vcom_socket_close_socket (int fd) +{ + int rv; + + rv = libc_close (fd); + if (rv == -1) + rv = -errno; + + return rv; +} + +static int +vcom_socket_close_epoll (int epfd) +{ + int rv; + + rv = libc_close (epfd); + if (rv == -1) + rv = -errno; + + return rv; +} + +/* + * Public API functions + */ + + +int +vcom_socket_is_vcom_fd (int fd) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, fd); + + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND) + return 1; + } + return 0; +} + +int +vcom_socket_is_vcom_epfd (int epfd) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_epoll_t *vepoll; + + p = hash_get (vsm->epollidx_by_epfd, epfd); + + if (p) + { + vepoll = pool_elt_at_index (vsm->vepolls, p[0]); + if (vepoll && vepoll->type == EPOLL_TYPE_VPPCOM_BOUND) + return 1; + } + return 0; +} + +static inline int +vcom_socket_get_sid (int fd) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, fd); + + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND) + return vsock->sid; + } + return INVALID_SESSION_ID; +} + +static inline int +vcom_socket_get_vep_idx (int epfd) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_epoll_t *vepoll; + + p = hash_get (vsm->epollidx_by_epfd, epfd); + + if (p) + { + vepoll = pool_elt_at_index (vsm->vepolls, p[0]); + if (vepoll && vepoll->type == EPOLL_TYPE_VPPCOM_BOUND) + return vepoll->vep_idx; + } + return INVALID_VEP_IDX; +} + +static inline int +vcom_socket_get_sid_and_vsock (int fd, vcom_socket_t ** vsockp) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, fd); + + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND) + { + *vsockp = vsock; + return vsock->sid; + } + } + return INVALID_SESSION_ID; +} + +static inline int +vcom_socket_get_vep_idx_and_vepoll (int epfd, vcom_epoll_t ** vepollp) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_epoll_t *vepoll; + + p = hash_get (vsm->epollidx_by_epfd, epfd); + + if (p) + { + vepoll = pool_elt_at_index (vsm->vepolls, p[0]); + if (vepoll && vepoll->type == EPOLL_TYPE_VPPCOM_BOUND) + { + *vepollp = vepoll; + return vepoll->vep_idx; + } + } + return INVALID_VEP_IDX; +} + + +static int +vcom_socket_close_vepoll (int epfd) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_epoll_t *vepoll; + + p = hash_get (vsm->epollidx_by_epfd, epfd); + if (!p) + return -EBADF; + + vepoll = pool_elt_at_index (vsm->vepolls, p[0]); + if (!vepoll) + return -EBADF; + + if (vepoll->type != EPOLL_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (vepoll->count) + { + if (!vepoll->close) + { + vepoll->close = 1; + return 0; + } + else + { + return -EBADF; + } + } + + /* count is zero */ + rv = vppcom_session_close (vepoll->vep_idx); + rv = vcom_socket_close_epoll (vepoll->epfd); + + vepoll_init (vepoll); + hash_unset (vsm->epollidx_by_epfd, epfd); + pool_put (vsm->vepolls, vepoll); + + return rv; +} + +static int +vcom_socket_close_vsock (int fd) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + vcom_epitem_t *vepitem; + + i32 *vepitemidxs = 0; + i32 *vepitemidxs_var = 0; + + p = hash_get (vsm->sockidx_by_fd, fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + rv = vppcom_session_close (vsock->sid); + rv = vcom_socket_close_socket (vsock->fd); + + vsocket_init (vsock); + hash_unset (vsm->sockidx_by_fd, fd); + pool_put (vsm->vsockets, vsock); + + /* + * NOTE: + * Before calling close(), user should remove + * this fd from the epoll-set of all epoll instances, + * otherwise resource(epitems) leaks ensues. + */ + + /* + * 00. close all epoll instances that are marked as "close" + * of which this fd is the "last" remaining member. + * 01. epitems associated with this fd are intentionally + * not removed, see NOTE: above. + * */ + + /* does this fd participate in epoll */ + p = hash_get (vsm->epitemidxs_by_fd, fd); + if (p) + { + vepitemidxs = *(i32 **) p; + vec_foreach (vepitemidxs_var, vepitemidxs) + { + vepitem = pool_elt_at_index (vsm->vepitems, vepitemidxs_var[0]); + if (vepitem && vepitem->fd == fd && + vepitem->type == FD_TYPE_VCOM_SOCKET) + { + i32 vep_idx; + vcom_epoll_t *vepoll; + if ((vep_idx = + vcom_socket_get_vep_idx_and_vepoll (vepitem->epfd, + &vepoll)) != + INVALID_VEP_IDX) + { + if (vepoll->close) + { + if (vepoll->count == 1) + { + /* + * force count to zero and + * close this epoll instance + * */ + vepoll->count = 0; + vcom_socket_close_vepoll (vepoll->epfd); + } + else + { + vepoll->count -= 1; + } + } + } + } + + } + } + + return rv; +} + +int +vcom_socket_close (int __fd) +{ + int rv; + + if (vcom_socket_is_vcom_fd (__fd)) + { + rv = vcom_socket_close_vsock (__fd); + } + else if (vcom_socket_is_vcom_epfd (__fd)) + { + rv = vcom_socket_close_vepoll (__fd); + } + else + { + rv = -EBADF; + } + + return rv; +} + +ssize_t +vcom_socket_read (int __fd, void *__buf, size_t __nbytes) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__buf) + { + return -EINVAL; + } + + rv = vcom_fcntl (__fd, F_GETFL, 0); + if (rv < 0) + { + return rv; + + } + + /* is blocking */ + if (!(rv & O_NONBLOCK)) + { + do + { + rv = vppcom_session_read (vsock->sid, __buf, __nbytes); + } + while (rv == -EAGAIN || rv == -EWOULDBLOCK); + return rv; + } + /* The file descriptor refers to a socket and has been + * marked nonblocking(O_NONBLOCK) and the read would + * block. + * */ + /* is non blocking */ + rv = vppcom_session_read (vsock->sid, __buf, __nbytes); + return rv; +} + +ssize_t +vcom_socket_readv (int __fd, const struct iovec * __iov, int __iovcnt) +{ + int rv; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + ssize_t total = 0, len = 0; + int i; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (__iov == 0 || __iovcnt == 0 || __iovcnt > IOV_MAX) + return -EINVAL; + + /* Sanity check */ + for (i = 0; i < __iovcnt; ++i) + { + if (SSIZE_MAX - len < __iov[i].iov_len) + return -EINVAL; + len += __iov[i].iov_len; + } + + rv = vcom_fcntl (__fd, F_GETFL, 0); + if (rv < 0) + { + return rv; + } + + /* is blocking */ + if (!(rv & O_NONBLOCK)) + { + do + { + for (i = 0; i < __iovcnt; ++i) + { + rv = vppcom_session_read (vsock->sid, __iov[i].iov_base, + __iov[i].iov_len); + if (rv < 0) + break; + else + { + total += rv; + if (rv < __iov[i].iov_len) + /* Read less than buffer provided, no point to continue */ + break; + } + } + } + while ((rv == -EAGAIN || rv == -EWOULDBLOCK) && total == 0); + return total; + } + + /* is non blocking */ + for (i = 0; i < __iovcnt; ++i) + { + rv = vppcom_session_read (vsock->sid, __iov[i].iov_base, + __iov[i].iov_len); + if (rv < 0) + { + if (total > 0) + break; + else + { + errno = rv; + return rv; + } + } + else + { + total += rv; + if (rv < __iov[i].iov_len) + /* Read less than buffer provided, no point to continue */ + break; + } + } + return total; +} + +ssize_t +vcom_socket_write (int __fd, const void *__buf, size_t __n) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + if (!__buf) + { + return -EINVAL; + } + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + rv = vppcom_session_write (vsock->sid, (void *) __buf, __n); + return rv; +} + +ssize_t +vcom_socket_writev (int __fd, const struct iovec * __iov, int __iovcnt) +{ + int rv = -1; + ssize_t total = 0; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + int i; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (__iov == 0 || __iovcnt == 0 || __iovcnt > IOV_MAX) + return -EINVAL; + + for (i = 0; i < __iovcnt; ++i) + { + rv = vppcom_session_write (vsock->sid, __iov[i].iov_base, + __iov[i].iov_len); + if (rv < 0) + { + if (total > 0) + break; + else + return rv; + } + else + total += rv; + } + return total; +} + +/* + * RETURN: 0 - invalid cmd + * 1 - cmd not handled by vcom and vppcom + * 2 - cmd handled by vcom socket resource + * 3 - cmd handled by vppcom + * */ +/* TBD: incomplete list of cmd */ +static int +vcom_socket_check_fcntl_cmd (int __cmd) +{ + switch (__cmd) + { + /*cmd not handled by vcom and vppcom */ + /* Fallthrough */ + case F_DUPFD: + case F_DUPFD_CLOEXEC: + return 1; + + /* cmd handled by vcom socket resource */ + /* Fallthrough */ + case F_GETFD: + case F_SETFD: + case F_GETLK: + case F_SETLK: + case F_SETLKW: + case F_GETOWN: + case F_SETOWN: + return 2; + + /* cmd handled by vcom and vppcom */ + case F_SETFL: + case F_GETFL: + return 3; + + /* cmd not handled by vcom and vppcom */ + default: + return 1; + } + return 0; +} + +static inline int +vcom_session_fcntl_va (int __sid, int __cmd, va_list __ap) +{ + int flags = va_arg (__ap, int); + int rv = -EOPNOTSUPP; + uint32_t size; + + size = sizeof (flags); + if (__cmd == F_SETFL) + { + rv = vppcom_session_attr (__sid, VPPCOM_ATTR_SET_FLAGS, &flags, &size); + } + else if (__cmd == F_GETFL) + { + rv = vppcom_session_attr (__sid, VPPCOM_ATTR_GET_FLAGS, &flags, &size); + if (rv == VPPCOM_OK) + rv = flags; + } + + return rv; +} + +int +vcom_socket_fcntl_va (int __fd, int __cmd, va_list __ap) +{ + int rv = -EBADF; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + switch (vcom_socket_check_fcntl_cmd (__cmd)) + { + /* invalid cmd */ + case 0: + rv = -EBADF; + break; + /*cmd not handled by vcom and vppcom */ + case 1: + rv = libc_vfcntl (vsock->fd, __cmd, __ap); + break; + /* cmd handled by vcom socket resource */ + case 2: + rv = libc_vfcntl (vsock->fd, __cmd, __ap); + break; + /* cmd handled by vppcom */ + case 3: + rv = vcom_session_fcntl_va (vsock->sid, __cmd, __ap); + break; + + default: + rv = -EINVAL; + break; + } + + return rv; +} + +/* + * RETURN: 0 - invalid cmd + * 1 - cmd not handled by vcom and vppcom + * 2 - cmd handled by vcom socket resource + * 3 - cmd handled by vppcom + */ +static int +vcom_socket_check_ioctl_cmd (unsigned long int __cmd) +{ + int rc; + + switch (__cmd) + { + /* cmd handled by vppcom */ + case FIONREAD: + rc = 3; + break; + + /* cmd not handled by vcom and vppcom */ + default: + rc = 1; + break; + } + return rc; +} + +static inline int +vcom_session_ioctl_va (int __sid, int __cmd, va_list __ap) +{ + int rv; + + if (__cmd == FIONREAD) + rv = vppcom_session_attr (__sid, VPPCOM_ATTR_GET_NREAD, 0, 0); + else + rv = -EOPNOTSUPP; + return rv; +} + +int +vcom_socket_ioctl_va (int __fd, unsigned long int __cmd, va_list __ap) +{ + int rv = -EBADF; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + switch (vcom_socket_check_ioctl_cmd (__cmd)) + { + /* Not supported cmd */ + case 0: + rv = -EOPNOTSUPP; + break; + + /* cmd not handled by vcom and vppcom */ + case 1: + rv = libc_vioctl (vsock->fd, __cmd, __ap); + break; + + /* cmd handled by vcom socket resource */ + case 2: + rv = libc_vioctl (vsock->fd, __cmd, __ap); + break; + + /* cmd handled by vppcom */ + case 3: + rv = vcom_session_ioctl_va (vsock->sid, __cmd, __ap); + break; + + default: + rv = -EINVAL; + break; + } + + return rv; +} + +static inline int +vcom_socket_fds_2_sid_fds ( + /* dest */ + int *vcom_nsid_fds, + fd_set * __restrict vcom_rd_sid_fds, + fd_set * __restrict vcom_wr_sid_fds, + fd_set * __restrict vcom_ex_sid_fds, + /* src */ + int vcom_nfds, + fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds) +{ + int rv = 0; + int fd; + int sid; + /* invalid max_sid is -1 */ + int max_sid = -1; + int nsid = 0; + + /* + * set sid in sid sets corresponding to fd's in fd sets + * compute nsid and vcom_nsid_fds from sid sets + */ + + for (fd = 0; fd < vcom_nfds; fd++) + { + /* + * F fd set, src + * S sid set, dest + */ +#define _(S,F) \ + if ((F) && (S) && FD_ISSET (fd, (F))) \ + { \ + sid = vcom_socket_get_sid (fd); \ + if (sid != INVALID_SESSION_ID) \ + { \ + FD_SET (sid, (S)); \ + if (sid > max_sid) \ + { \ + max_sid = sid; \ + } \ + ++nsid; \ + } \ + else \ + { \ + rv = -EBADFD; \ + goto done; \ + } \ + } + + + _(vcom_rd_sid_fds, vcom_readfds); + _(vcom_wr_sid_fds, vcom_writefds); + _(vcom_ex_sid_fds, vcom_exceptfds); +#undef _ + } + + *vcom_nsid_fds = max_sid != -1 ? max_sid + 1 : 0; + rv = nsid; + +done: + return rv; +} + +/* + * PRE: 00. sid sets were derived from fd sets + * 01. sid sets were updated with sids that actually changed + * status + * 02. fd sets still has watched fds + * + * This function will modify in place fd sets to indicate which fd's + * actually changed status(inferred from sid sets) + */ +static inline int +vcom_socket_sid_fds_2_fds ( + /* dest */ + int *new_vcom_nfds, + int vcom_nfds, + fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds, + /* src */ + int vcom_nsid_fds, + fd_set * __restrict vcom_rd_sid_fds, + fd_set * __restrict vcom_wr_sid_fds, + fd_set * __restrict vcom_ex_sid_fds) +{ + int rv = 0; + int fd; + int sid; + /* invalid max_fd is -1 */ + int max_fd = -1; + int nfd = 0; + + + /* + * modify in place fd sets to indicate which fd's + * actually changed status(inferred from sid sets) + */ + for (fd = 0; fd < vcom_nfds; fd++) + { + /* + * F fd set, dest + * S sid set, src + */ +#define _(S,F) \ + if ((F) && (S) && FD_ISSET (fd, (F))) \ + { \ + sid = vcom_socket_get_sid (fd); \ + if (sid != INVALID_SESSION_ID) \ + { \ + if (!FD_ISSET (sid, (S))) \ + { \ + FD_CLR(fd, (F)); \ + } \ + } \ + else \ + { \ + rv = -EBADFD; \ + goto done; \ + } \ + } + + + _(vcom_rd_sid_fds, vcom_readfds); + _(vcom_wr_sid_fds, vcom_writefds); + _(vcom_ex_sid_fds, vcom_exceptfds); +#undef _ + } + + /* + * compute nfd and new_vcom_nfds from fd sets + */ + for (fd = 0; fd < vcom_nfds; fd++) + { + +#define _(F) \ + if ((F) && FD_ISSET (fd, (F))) \ + { \ + if (fd > max_fd) \ + { \ + max_fd = fd; \ + } \ + ++nfd; \ + } + + + _(vcom_readfds); + _(vcom_writefds); + _(vcom_exceptfds); +#undef _ + + } + + *new_vcom_nfds = max_fd != -1 ? max_fd + 1 : 0; + rv = nfd; + +done: + return rv; +} + +/* + * PRE: + * vom_socket_select is always called with + * timeout->tv_sec and timeout->tv_usec set to zero. + * hence vppcom_select return immediately. + */ +/* + * TBD: do{body;} while(timeout conditional); timeout loop + */ +int +vcom_socket_select (int vcom_nfds, fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds, + struct timeval *__restrict timeout) +{ + static unsigned long vcom_nsid_fds = 0; + int vcom_nsid = 0; + int rv = -EBADF; + pid_t pid = getpid (); + + int new_vcom_nfds = 0; + int new_vcom_nfd = 0; + + /* vcom sid fds */ + fd_set vcom_rd_sid_fds; + fd_set vcom_wr_sid_fds; + fd_set vcom_ex_sid_fds; + + /* in seconds eg. 3.123456789 seconds */ + double time_to_wait = (double) 0; + + /* validate inputs */ + if (vcom_nfds < 0) + { + return -EINVAL; + } + + /* convert timeval timeout to double time_to_wait */ + if (timeout) + { + if (timeout->tv_sec == 0 && timeout->tv_usec == 0) + { + /* polling: vppcom_select returns immediately */ + time_to_wait = (double) 0; + } + else + { + /*TBD: use timeval api */ + time_to_wait = (double) timeout->tv_sec + + (double) timeout->tv_usec / (double) 1000000 + + (double) (timeout->tv_usec % 1000000) / (double) 1000000; + } + } + else + { + /* + * no timeout: vppcom_select can block indefinitely + * waiting for a file descriptor to become ready + * */ + /* set to a phantom value */ + time_to_wait = ~0; + } + + /* zero the sid_sets */ + /* + * F fd set + * S sid set + */ +#define _(S,F) \ + if ((F)) \ + { \ + FD_ZERO ((S)); \ + } + + + _(&vcom_rd_sid_fds, vcom_readfds); + _(&vcom_wr_sid_fds, vcom_writefds); + _(&vcom_ex_sid_fds, vcom_exceptfds); +#undef _ + + if (vcom_nfds == 0) + { + if (time_to_wait > 0) + { + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] vcom_socket_select called to " + "emulate delay_ns()!\n", pid); + rv = vppcom_select (0, NULL, NULL, NULL, time_to_wait); + } + else + { + fprintf (stderr, "[%d] vcom_socket_select called vcom_nfds = 0 " + "and invalid time_to_wait (%f)!\n", pid, time_to_wait); + } + return 0; + } + + /* populate read, write and except sid_sets */ + vcom_nsid = vcom_socket_fds_2_sid_fds ( + /* dest */ + vcom_readfds || vcom_writefds + || vcom_exceptfds ? (int *) + &vcom_nsid_fds : NULL, + vcom_readfds ? &vcom_rd_sid_fds : + NULL, + vcom_writefds ? &vcom_wr_sid_fds : + NULL, + vcom_exceptfds ? &vcom_ex_sid_fds : + NULL, + /* src */ + vcom_nfds, + vcom_readfds, + vcom_writefds, vcom_exceptfds); + if (vcom_nsid < 0) + { + return vcom_nsid; + } + + rv = vppcom_select (vcom_nsid_fds, + vcom_readfds ? (unsigned long *) &vcom_rd_sid_fds : + NULL, + vcom_writefds ? (unsigned long *) &vcom_wr_sid_fds : + NULL, + vcom_exceptfds ? (unsigned long *) &vcom_ex_sid_fds : + NULL, time_to_wait); + if (VCOM_DEBUG > 2) + fprintf (stderr, "[%d] called vppcom_select(): " + "'%04d'='%04d'\n", pid, rv, (int) vcom_nsid_fds); + + /* check if any file descriptors changed status */ + if (rv > 0) + { + /* + * on exit, sets are modified in place to indicate which + * file descriptors actually changed status + * */ + + /* + * comply with pre-condition + * do not clear vcom fd sets befor calling + * vcom_socket_sid_fds_2_fds + */ + new_vcom_nfd = vcom_socket_sid_fds_2_fds ( + /* dest */ + &new_vcom_nfds, + vcom_nfds, + vcom_readfds, + vcom_writefds, + vcom_exceptfds, + /* src */ + vcom_nsid_fds, + vcom_readfds ? + &vcom_rd_sid_fds : NULL, + vcom_writefds ? + &vcom_wr_sid_fds : NULL, + vcom_exceptfds ? + &vcom_ex_sid_fds : NULL); + if (new_vcom_nfd < 0) + { + return new_vcom_nfd; + } + if (new_vcom_nfds < 0) + { + return -EINVAL; + } + rv = new_vcom_nfd; + } + return rv; +} + + +int +vcom_socket_socket (int __domain, int __type, int __protocol) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_socket_t *vsock; + + i32 fd; + i32 sid; + i32 sockidx; + u8 is_nonblocking = __type & SOCK_NONBLOCK ? 1 : 0; + int type = __type & ~(SOCK_NONBLOCK | SOCK_CLOEXEC); + + fd = vcom_socket_open_socket (__domain, __type, __protocol); + if (fd < 0) + { + rv = fd; + goto out; + } + + sid = vppcom_session_create (VPPCOM_VRF_DEFAULT, + (type == SOCK_DGRAM) ? + VPPCOM_PROTO_UDP : VPPCOM_PROTO_TCP, + is_nonblocking); + if (sid < 0) + { + rv = sid; + goto out_close_socket; + } + + pool_get (vsm->vsockets, vsock); + vsocket_init (vsock); + + sockidx = vsock - vsm->vsockets; + hash_set (vsm->sockidx_by_fd, fd, sockidx); + + vsocket_set (vsock, fd, sid, SOCKET_TYPE_VPPCOM_BOUND); + return fd; + +out_close_socket: + vcom_socket_close_socket (fd); +out: + return rv; +} + +int +vcom_socket_socketpair (int __domain, int __type, int __protocol, + int __fds[2]) +{ +/* TBD: */ + return 0; +} + +int +vcom_socket_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + vppcom_endpt_t ep; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__addr) + { + return -EINVAL; + } + + ep.vrf = VPPCOM_VRF_DEFAULT; + switch (__addr->sa_family) + { + case AF_INET: + if (__len != sizeof (struct sockaddr_in)) + { + return -EINVAL; + } + ep.is_ip4 = VPPCOM_IS_IP4; + ep.ip = (u8 *) & ((const struct sockaddr_in *) __addr)->sin_addr; + ep.port = (u16) ((const struct sockaddr_in *) __addr)->sin_port; + break; + + case AF_INET6: + if (__len != sizeof (struct sockaddr_in6)) + { + return -EINVAL; + } + ep.is_ip4 = VPPCOM_IS_IP6; + ep.ip = (u8 *) & ((const struct sockaddr_in6 *) __addr)->sin6_addr; + ep.port = (u16) ((const struct sockaddr_in6 *) __addr)->sin6_port; + break; + + default: + return -1; + break; + } + + rv = vppcom_session_bind (vsock->sid, &ep); + return rv; +} + +static inline int +vcom_session_getsockname (int sid, vppcom_endpt_t * ep) +{ + int rv; + uint32_t size = sizeof (*ep); + + rv = vppcom_session_attr (sid, VPPCOM_ATTR_GET_LCL_ADDR, ep, &size); + return rv; +} + +int +vcom_socket_getsockname (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__addr || !__len) + return -EFAULT; + + vppcom_endpt_t ep; + ep.ip = (u8 *) & ((const struct sockaddr_in *) __addr)->sin_addr; + rv = vcom_session_getsockname (vsock->sid, &ep); + if (rv == 0) + { + if (ep.vrf == VPPCOM_VRF_DEFAULT) + { + __addr->sa_family = ep.is_ip4 == VPPCOM_IS_IP4 ? AF_INET : AF_INET6; + switch (__addr->sa_family) + { + case AF_INET: + ((struct sockaddr_in *) __addr)->sin_port = ep.port; + *__len = sizeof (struct sockaddr_in); + break; + + case AF_INET6: + ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port; + *__len = sizeof (struct sockaddr_in6); + break; + + default: + break; + } + } + } + + return rv; +} + +int +vcom_socket_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + vppcom_endpt_t ep; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + + ep.vrf = VPPCOM_VRF_DEFAULT; + switch (__addr->sa_family) + { + case AF_INET: + ep.is_ip4 = VPPCOM_IS_IP4; + ep.ip = + (uint8_t *) & ((const struct sockaddr_in *) __addr)->sin_addr; + ep.port = + (uint16_t) ((const struct sockaddr_in *) __addr)->sin_port; + break; + + case AF_INET6: + ep.is_ip4 = VPPCOM_IS_IP6; + ep.ip = + (uint8_t *) & ((const struct sockaddr_in6 *) __addr)->sin6_addr; + ep.port = + (uint16_t) ((const struct sockaddr_in6 *) __addr)->sin6_port; + break; + + default: + return -1; + break; + } + + rv = vppcom_session_connect (vsock->sid, &ep); + } + return rv; +} + +static inline int +vcom_session_getpeername (int sid, vppcom_endpt_t * ep) +{ + int rv; + uint32_t size = sizeof (*ep); + + rv = vppcom_session_attr (sid, VPPCOM_ATTR_GET_PEER_ADDR, ep, &size); + return rv; +} + +int +vcom_socket_getpeername (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__addr || !__len) + return -EFAULT; + + vppcom_endpt_t ep; + ep.ip = (u8 *) & ((const struct sockaddr_in *) __addr)->sin_addr; + rv = vcom_session_getpeername (vsock->sid, &ep); + if (rv == 0) + { + if (ep.vrf == VPPCOM_VRF_DEFAULT) + { + __addr->sa_family = ep.is_ip4 == VPPCOM_IS_IP4 ? AF_INET : AF_INET6; + switch (__addr->sa_family) + { + case AF_INET: + ((struct sockaddr_in *) __addr)->sin_port = ep.port; + *__len = sizeof (struct sockaddr_in); + break; + + case AF_INET6: + ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port; + *__len = sizeof (struct sockaddr_in6); + break; + + default: + break; + } + } + } + + return rv; +} + +ssize_t +vcom_socket_send (int __fd, const void *__buf, size_t __n, int __flags) +{ + return vcom_socket_sendto (__fd, __buf, __n, __flags, NULL, 0); +} + +ssize_t +vcom_socket_recv (int __fd, void *__buf, size_t __n, int __flags) +{ + int rv = -1; + rv = vcom_socket_recvfrom (__fd, __buf, __n, __flags, NULL, 0); + return rv; +} + +/* + * RETURN 1 if __fd is (SOCK_STREAM, SOCK_SEQPACKET), + * 0 otherwise + * */ +int +vcom_socket_is_connection_mode_socket (int __fd) +{ + int rv = -1; + /* TBD define new vppcom api */ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + int type; + socklen_t optlen; + + p = hash_get (vsm->sockidx_by_fd, __fd); + + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND) + { + optlen = sizeof (type); + rv = libc_getsockopt (__fd, SOL_SOCKET, SO_TYPE, &type, &optlen); + if (rv != 0) + { + return 0; + } + /* get socket type */ + switch (type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + { + case SOCK_STREAM: + case SOCK_SEQPACKET: + return 1; + break; + + default: + return 0; + break; + } + } + } + return 0; +} + +static inline ssize_t +vcom_session_sendto (int __sid, void *__buf, size_t __n, + int __flags, __CONST_SOCKADDR_ARG __addr, + socklen_t __addr_len) +{ + int rv = -1; + /* TBD add new vpp api */ + /* TBD add flags parameter */ + rv = vppcom_session_write (__sid, (void *) __buf, (int) __n); + return rv; +} + +ssize_t +vcom_socket_sendto (int __fd, const void *__buf, size_t __n, + int __flags, __CONST_SOCKADDR_ARG __addr, + socklen_t __addr_len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + if (!__buf) + { + return -EINVAL; + } + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + { + return -EINVAL; + } + + if (vcom_socket_is_connection_mode_socket (__fd)) + { + /* ignore __addr and _addr_len */ + /* and EISCONN may be returned when they are not NULL and 0 */ + if ((__addr != NULL) || (__addr_len != 0)) + { + return -EISCONN; + } + } + else + { + if (!__addr) + { + return -EDESTADDRREQ; + } + /* not a vppcom supported address family */ + if ((__addr->sa_family != AF_INET) || (__addr->sa_family != AF_INET6)) + { + return -EINVAL; + } + } + + rv = vcom_session_sendto (vsock->sid, (void *) __buf, (int) __n, + __flags, __addr, __addr_len); + return rv; +} + +static inline ssize_t +vcom_session_recvfrom (int __sid, void *__restrict __buf, size_t __n, + int __flags, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len) +{ + int rv = -1; + + /* TBD add flags parameter */ + rv = vppcom_session_read (__sid, __buf, __n); + return rv; +} + +ssize_t +vcom_socket_recvfrom (int __fd, void *__restrict __buf, size_t __n, + int __flags, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + if (!__buf || !__addr || !__addr_len) + { + return -EINVAL; + } + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + { + return -EINVAL; + } + + rv = vcom_session_recvfrom (vsock->sid, __buf, __n, + __flags, __addr, __addr_len); + return rv; +} + +/* TBD: move it to vppcom */ +static inline ssize_t +vcom_session_sendmsg (int __sid, const struct msghdr *__message, int __flags) +{ + int rv = -1; + /* rv = vppcom_session_write (__sid, (void *) __message->__buf, + (int)__n); */ + return rv; +} + +ssize_t +vcom_socket_sendmsg (int __fd, const struct msghdr * __message, int __flags) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vcom_socket_is_connection_mode_socket (__fd)) + { + /* ignore __addr and _addr_len */ + /* and EISCONN may be returned when they are not NULL and 0 */ + if ((__message->msg_name != NULL) || (__message->msg_namelen != 0)) + { + return -EISCONN; + } + } + else + { + /* TBD: validate __message->msg_name and __message->msg_namelen + * and return -EINVAL on validation error + * */ + ; + } + + rv = vcom_session_sendmsg (vsock->sid, __message, __flags); + + return rv; +} + +#ifdef __USE_GNU +int +vcom_socket_sendmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags) +{ + + /* TBD: define a new vppcom api */ + return 0; +} +#endif + +/* TBD: move it to vppcom */ +static inline ssize_t +vcom_session_recvmsg (int __sid, struct msghdr *__message, int __flags) +{ + int rv = -1; + /* rv = vppcom_session_read (__sid, (void *) __message->__buf, + (int)__n); */ + rv = -EOPNOTSUPP; + return rv; +} + +ssize_t +vcom_socket_recvmsg (int __fd, struct msghdr * __message, int __flags) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__message) + { + return -EINVAL; + } + + /* validate __flags */ + + rv = vcom_session_recvmsg (vsock->sid, __message, __flags); + return rv; +} + +#ifdef __USE_GNU +int +vcom_socket_recvmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags, + struct timespec *__tmo) +{ + /* TBD: define a new vppcom api */ + return 0; +} +#endif + +/* TBD: move it to vppcom */ +static inline int +vcom_session_get_sockopt (int __sid, int __level, int __optname, + void *__restrict __optval, + socklen_t * __restrict __optlen) +{ + /* 1. for socket level options that are NOT socket attributes + * and that has corresponding vpp options get from vppcom */ + return 0; +} + +int +vcom_socket_getsockopt (int __fd, int __level, int __optname, + void *__restrict __optval, + socklen_t * __restrict __optlen) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__optval && !__optlen) + return -EFAULT; + + switch (__level) + { + /* handle options at socket level */ + case SOL_SOCKET: + switch (__optname) + { +/* + * 1. for socket level options that are socket attributes, + * get from libc_getsockopt. + * 2. for socket level options that are NOT socket + * attributes and that has corresponding vpp options + * get from vppcom. + * 3. for socket level options unimplemented + * return -ENOPROTOOPT */ + case SO_DEBUG: + case SO_DONTROUTE: + case SO_BROADCAST: + case SO_SNDBUF: + case SO_RCVBUF: + case SO_REUSEADDR: + case SO_REUSEPORT: + case SO_KEEPALIVE: + case SO_TYPE: + case SO_PROTOCOL: + case SO_DOMAIN: + case SO_ERROR: + case SO_OOBINLINE: + case SO_NO_CHECK: + case SO_PRIORITY: + case SO_LINGER: + case SO_BSDCOMPAT: + case SO_TIMESTAMP: + case SO_TIMESTAMPNS: + case SO_TIMESTAMPING: + case SO_RCVTIMEO: + case SO_SNDTIMEO: + case SO_RCVLOWAT: + case SO_SNDLOWAT: + case SO_PASSCRED: + case SO_PEERCRED: + case SO_PEERNAME: + case SO_ACCEPTCONN: + case SO_PASSSEC: + case SO_PEERSEC: + case SO_MARK: + case SO_RXQ_OVFL: + case SO_WIFI_STATUS: + case SO_PEEK_OFF: + case SO_NOFCS: + case SO_BINDTODEVICE: + case SO_GET_FILTER: + case SO_LOCK_FILTER: + case SO_BPF_EXTENSIONS: + case SO_SELECT_ERR_QUEUE: +#ifdef CONFIG_NET_RX_BUSY_POLL + case SO_BUSY_POLL: +#endif + case SO_MAX_PACING_RATE: +#ifdef SO_INCOMING_CPU + case SO_INCOMING_CPU: +#endif + rv = libc_getsockopt (__fd, __level, __optname, __optval, __optlen); + if (rv != 0) + { + rv = -errno; + return rv; + } + break; + + default: + /* We implement the SO_SNDLOWAT etc to not be settable + * (1003.1g 7). + */ + return -ENOPROTOOPT; + } + + break; + + default: + /* 1. handle options that are NOT socket level options, + * but have corresponding vpp otions. */ + rv = vcom_session_get_sockopt (vsock->sid, __level, __optname, + __optval, __optlen); + break; + } + + return rv; +} + +/* TBD: move it to vppcom */ +static inline int +vcom_session_setsockopt (int __sid, int __level, int __optname, + const void *__optval, socklen_t __optlen) +{ + int rv = -EOPNOTSUPP; + + switch (__level) + { + case SOL_TCP: + switch (__optname) + { + case TCP_KEEPIDLE: + rv = + vppcom_session_attr (__sid, VPPCOM_ATTR_SET_TCP_KEEPIDLE, 0, 0); + break; + case TCP_KEEPINTVL: + rv = + vppcom_session_attr (__sid, VPPCOM_ATTR_SET_TCP_KEEPINTVL, 0, 0); + break; + default: + break; + } + break; + case SOL_IPV6: + switch (__optname) + { + case IPV6_V6ONLY: + rv = vppcom_session_attr (__sid, VPPCOM_ATTR_SET_V6ONLY, 0, 0); + break; + default: + break; + } + break; + case SOL_SOCKET: + switch (__optname) + { + case SO_KEEPALIVE: + rv = vppcom_session_attr (__sid, VPPCOM_ATTR_SET_KEEPALIVE, 0, 0); + break; + case SO_REUSEADDR: + rv = vppcom_session_attr (__sid, VPPCOM_ATTR_SET_REUSEADDR, 0, 0); + break; + case SO_BROADCAST: + rv = vppcom_session_attr (__sid, VPPCOM_ATTR_SET_BROADCAST, 0, 0); + break; + default: + break; + } + break; + default: + break; + } + + return rv; +} + +int +vcom_socket_setsockopt (int __fd, int __level, int __optname, + const void *__optval, socklen_t __optlen) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + /* + * Options without arguments + */ + + if (__optname == SO_BINDTODEVICE) + { + rv = libc_setsockopt (__fd, __level, __optname, __optval, __optlen); + if (rv != 0) + { + rv = -errno; + } + return rv; + } + + if (!__optval) + return -EFAULT; + + if (__optlen < sizeof (int)) + return -EINVAL; + + switch (__level) + { + case SOL_IPV6: + switch (__optname) + { + case IPV6_V6ONLY: + rv = vcom_session_setsockopt (vsock->sid, __level, __optname, + __optval, __optlen); + break; + default: + return -EOPNOTSUPP; + } + break; + case SOL_TCP: + switch (__optname) + { + case TCP_NODELAY: + return 0; + case TCP_KEEPIDLE: + case TCP_KEEPINTVL: + rv = vcom_session_setsockopt (vsock->sid, __level, __optname, + __optval, __optlen); + break; + default: + return -EOPNOTSUPP; + } + break; + /* handle options at socket level */ + case SOL_SOCKET: + switch (__optname) + { + case SO_REUSEADDR: + case SO_BROADCAST: + case SO_KEEPALIVE: + rv = vcom_session_setsockopt (vsock->sid, __level, __optname, + __optval, __optlen); + break; + + /* + * 1. for socket level options that are socket attributes, + * set it from libc_getsockopt + * 2. for socket level options that are NOT socket + * attributes and that has corresponding vpp options + * set it from vppcom + * 3. for socket level options unimplemented + * return -ENOPROTOOPT */ + case SO_DEBUG: + case SO_DONTROUTE: + case SO_SNDBUF: + case SO_RCVBUF: + case SO_REUSEPORT: + case SO_TYPE: + case SO_PROTOCOL: + case SO_DOMAIN: + case SO_ERROR: + case SO_OOBINLINE: + case SO_NO_CHECK: + case SO_PRIORITY: + case SO_LINGER: + case SO_BSDCOMPAT: + case SO_TIMESTAMP: + case SO_TIMESTAMPNS: + case SO_TIMESTAMPING: + case SO_RCVTIMEO: + case SO_SNDTIMEO: + case SO_RCVLOWAT: + case SO_SNDLOWAT: + case SO_PASSCRED: + case SO_PEERCRED: + case SO_PEERNAME: + case SO_ACCEPTCONN: + case SO_PASSSEC: + case SO_PEERSEC: + case SO_MARK: + case SO_RXQ_OVFL: + case SO_WIFI_STATUS: + case SO_PEEK_OFF: + case SO_NOFCS: + /* + * SO_BINDTODEVICE already handled as + * "Options without arguments" */ + /* case SO_BINDTODEVICE: */ + case SO_GET_FILTER: + case SO_LOCK_FILTER: + case SO_BPF_EXTENSIONS: + case SO_SELECT_ERR_QUEUE: +#ifdef CONFIG_NET_RX_BUSY_POLL + case SO_BUSY_POLL: +#endif + case SO_MAX_PACING_RATE: +#ifdef SO_INCOMING_CPU + case SO_INCOMING_CPU: +#endif + rv = libc_setsockopt (__fd, __level, __optname, __optval, __optlen); + if (rv != 0) + { + rv = -errno; + return rv; + } + break; + + default: + /* We implement the SO_SNDLOWAT etc to not be settable + * (1003.1g 7). + */ + return -ENOPROTOOPT; + } + + break; + + default: + return -ENOPROTOOPT; + } + + return rv; +} + +int +vcom_socket_listen (int __fd, int __n) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + + /* TBD vppcom to accept __n parameter */ + rv = vppcom_session_listen (vsock->sid, __n); + } + + return rv; +} + +static int +vcom_socket_connected_socket (int __fd, int __sid, + int *__domain, + int *__type, int *__protocol, int flags) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_socket_t *vsock; + + i32 fd; + i32 sockidx; + + socklen_t optlen; + + optlen = sizeof (*__domain); + rv = libc_getsockopt (__fd, SOL_SOCKET, SO_DOMAIN, __domain, &optlen); + if (rv != 0) + { + rv = -errno; + goto out; + } + + optlen = sizeof (*__type); + rv = libc_getsockopt (__fd, SOL_SOCKET, SO_TYPE, __type, &optlen); + if (rv != 0) + { + rv = -errno; + goto out; + } + + optlen = sizeof (*__protocol); + rv = libc_getsockopt (__fd, SOL_SOCKET, SO_PROTOCOL, __protocol, &optlen); + if (rv != 0) + { + rv = -errno; + goto out; + } + + fd = vcom_socket_open_socket (*__domain, *__type | flags, *__protocol); + if (fd < 0) + { + rv = fd; + goto out; + } + + pool_get (vsm->vsockets, vsock); + vsocket_init (vsock); + + sockidx = vsock - vsm->vsockets; + hash_set (vsm->sockidx_by_fd, fd, sockidx); + + vsocket_set (vsock, fd, __sid, SOCKET_TYPE_VPPCOM_BOUND); + return fd; + +out: + return rv; +} + +/* If flag is 0, then accept4() is the same as accept(). + * SOCK_NONBLOCK and SOCK_CLOEXEC can be bitwise ORed in flags + */ +static int +vcom_socket_accept_flags (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int flags) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + int fd; + int sid; + int domain; + int type; + int protocol; + + uint8_t addr8[sizeof (struct in6_addr)]; + vppcom_endpt_t ep; + + ep.ip = addr8; + + /* validate flags */ + + /* + * for documentation + * switch (flags) + * { + * case 0: + * case SOCK_NONBLOCK: + * case SOCK_CLOEXEC: + * case SOCK_NONBLOCK | SOCK_CLOEXEC: + * break; + * + * default: + * return -1; + * } + */ + /* flags can be 0 or can be bitwise OR + * of any of SOCK_NONBLOCK and SOCK_CLOEXEC */ + + if (!(!flags || (flags & (SOCK_NONBLOCK | SOCK_CLOEXEC)))) + { + /* TBD: return proper error code */ + return -1; + } + + /* TBD: return proper error code */ + + if (!vcom_socket_is_connection_mode_socket (__fd)) + { + return -EOPNOTSUPP; + } + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + + + rv = vcom_fcntl (vsock->fd, F_GETFL, 0); + if (rv < 0) + { + return rv; + } + + /* is blocking */ + if (!(rv & O_NONBLOCK)) + { + /* socket is not marked as nonblocking + * and no pending connections are present + * on the queue, accept () blocks the caller + * until a connection is present. + */ + rv = vppcom_session_accept (vsock->sid, &ep, + -1.0 /* wait forever */ ); + } + else + { + /* The file descriptor refers to a socket and has been + * marked nonblocking(O_NONBLOCK) and the accept would + * block. + * */ + /* is non blocking */ + rv = vppcom_session_accept (vsock->sid, &ep, 0); + /* If the socket is marked nonblocking and + * no pending connections are present on the + * queue, accept fails with the error + * EAGAIN or EWOULDBLOCK + */ + if (rv == VPPCOM_ETIMEDOUT) + { + rv = VPPCOM_EAGAIN; + } + } + if (rv < 0) + { + return rv; + } + + sid = rv; + + /* create a new connected socket resource and set flags + * on the new file descriptor. + * update vsockets and sockidx_by_fd table + * */ + fd = vcom_socket_connected_socket (__fd, sid, + &domain, &type, &protocol, flags); + if (fd < 0) + { + return fd; + } + + rv = fd; + + /* TBD populate __addr and __addr_len */ + /* TBD: The returned address is truncated if the buffer + * provided is too small, in this case, __addr_len will + * return a value greater than was supplied to the call.*/ + if (__addr) + { + if (ep.is_cut_thru) + { + /* TBD populate __addr and __addr_len */ + switch (domain) + { + case AF_INET: + ((struct sockaddr_in *) __addr)->sin_family = AF_INET; + ((struct sockaddr_in *) __addr)->sin_port = ep.port; + memcpy (&((struct sockaddr_in *) __addr)->sin_addr, + addr8, sizeof (struct in_addr)); + /* TBD: populate __addr_len */ + if (__addr_len) + { + *__addr_len = sizeof (struct sockaddr_in); + } + break; + + case AF_INET6: + ((struct sockaddr_in6 *) __addr)->sin6_family = AF_INET6; + ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port; + memcpy (((struct sockaddr_in6 *) __addr)->sin6_addr. + __in6_u.__u6_addr8, addr8, + sizeof (struct in6_addr)); + /* TBD: populate __addr_len */ + if (__addr_len) + { + *__addr_len = sizeof (struct sockaddr_in6); + } + break; + + default: + return -EAFNOSUPPORT; + } + } + else + { + switch (ep.is_ip4) + { + case VPPCOM_IS_IP4: + ((struct sockaddr_in *) __addr)->sin_family = AF_INET; + ((struct sockaddr_in *) __addr)->sin_port = ep.port; + memcpy (&((struct sockaddr_in *) __addr)->sin_addr, + addr8, sizeof (struct in_addr)); + /* TBD: populate __addr_len */ + if (__addr_len) + { + *__addr_len = sizeof (struct sockaddr_in); + } + break; + + case VPPCOM_IS_IP6: + ((struct sockaddr_in6 *) __addr)->sin6_family = AF_INET6; + ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port; + memcpy (((struct sockaddr_in6 *) __addr)->sin6_addr. + __in6_u.__u6_addr8, addr8, + sizeof (struct in6_addr)); + /* TBD: populate __addr_len */ + if (__addr_len) + { + *__addr_len = sizeof (struct sockaddr_in6); + } + break; + + default: + return -EAFNOSUPPORT; + } + } + } + else + { + /* when __addr is NULL, nothing is filled in, + * in this case, __addr_len is not used, + * and should also be null + * */ + if (__addr_len) + { + /* TBD: return proper error code */ + return -1; + } + } + } + + return rv; +} + +int +vcom_socket_accept (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len) +{ + /* set flags to 0 for accept() */ + return vcom_socket_accept_flags (__fd, __addr, __addr_len, 0); +} + +#ifdef __USE_GNU +int +vcom_socket_accept4 (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int __flags) +{ + /* SOCK_NONBLOCK and SOCK_CLOEXEC can be bitwise ORed in flags */ + return vcom_socket_accept_flags (__fd, __addr, __addr_len, __flags); +} +#endif + +/* TBD: move it to vppcom */ +static inline int +vcom_session_shutdown (int __fd, int __how) +{ + return 0; +} + +int +vcom_socket_shutdown (int __fd, int __how) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + switch (__how) + { + case SHUT_RD: + case SHUT_WR: + case SHUT_RDWR: + rv = vcom_session_shutdown (vsock->sid, __how); + return rv; + break; + + default: + return -EINVAL; + break; + } + } + + return rv; +} + +int +vcom_socket_epoll_create1 (int __flags) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_epoll_t *vepoll; + + i32 epfd; + i32 vep_idx; + i32 epollidx; + + epfd = vcom_socket_open_epoll (__flags); + if (epfd < 0) + { + rv = epfd; + goto out; + } + + vep_idx = vppcom_epoll_create (); + if (vep_idx < 0) + { + rv = vep_idx; + goto out_close_epoll; + } + + pool_get (vsm->vepolls, vepoll); + vepoll_init (vepoll); + + epollidx = vepoll - vsm->vepolls; + hash_set (vsm->epollidx_by_epfd, epfd, epollidx); + + vepoll_set (vepoll, epfd, vep_idx, EPOLL_TYPE_VPPCOM_BOUND, __flags, 0, 0); + + return epfd; + +out_close_epoll: + vcom_socket_close_epoll (epfd); +out: + return rv; +} + +/* + * PRE: vppcom_epoll_ctl() is successful + * free_vepitem_on_del : 0 - no_pool_put, 1 - pool_put + */ +int +vcom_socket_ctl_vepitem (int __epfd, int __op, int __fd, + struct epoll_event *__event, + i32 vep_idx, vcom_epoll_t * vepoll, + i32 vfd_id, void *vfd, vcom_fd_type_t type, + int free_vepitem_on_del) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_epitem_t *vepitem; + + vcom_epitem_key_t epfdfd = {.epfd = __epfd,.fd = __fd }; + uword *p; + i32 vepitemidx; + + i32 *vepitemidxs = 0; + + struct epoll_event revent = {.events = 0,.data.fd = INVALID_FD }; + + i32 vec_idx; + + /* perform control operations on the epoll instance */ + switch (__op) + { + case EPOLL_CTL_ADD: + /* + * supplied file descriptor is already + * registered with this epoll instance + * */ + /* vepitem exists */ + p = hash_get (vsm->epitemidx_by_epfdfd, epfdfd.key); + if (p) + { + rv = -EEXIST; + goto out; + } + + /* add a new vepitem */ + pool_get (vsm->vepitems, vepitem); + vepitem_init (vepitem); + + vepitemidx = vepitem - vsm->vepitems; + hash_set (vsm->epitemidx_by_epfdfd, epfdfd.key, vepitemidx); + vepitem_set (vepitem, __epfd, __fd, __fd, __fd, type, *__event, revent); + + /* update epitemidxs */ + /* by_epfd */ + p = hash_get (vsm->epitemidxs_by_epfd, __epfd); + if (!p) /* not exist */ + { + vepitemidxs = 0; + vec_add1 (vepitemidxs, vepitemidx); + hash_set (vsm->epitemidxs_by_epfd, __epfd, vepitemidxs); + } + else /* exists */ + { + vepitemidxs = *(i32 **) p; + vec_add1 (vepitemidxs, vepitemidx); + hash_set3 (vsm->epitemidxs_by_epfd, __epfd, vepitemidxs, 0); + } + /* update epitemidxs */ + /* by_fd */ + p = hash_get (vsm->epitemidxs_by_fd, __fd); + if (!p) /* not exist */ + { + vepitemidxs = 0; + vec_add1 (vepitemidxs, vepitemidx); + hash_set (vsm->epitemidxs_by_fd, __fd, vepitemidxs); + } + else /* exists */ + { + vepitemidxs = *(i32 **) p; + vec_add1 (vepitemidxs, vepitemidx); + hash_set3 (vsm->epitemidxs_by_fd, __fd, vepitemidxs, 0); + } + + /* increment vepoll fd count by 1 */ + vepoll->count += 1; + + rv = 0; + goto out; + break; + + case EPOLL_CTL_MOD: + /* + * supplied file descriptor is not + * registered with this epoll instance + * */ + /* vepitem not exist */ + p = hash_get (vsm->epitemidx_by_epfdfd, epfdfd.key); + if (!p) + { + rv = -ENOENT; + goto out; + } + vepitem = pool_elt_at_index (vsm->vepitems, p[0]); + if (vepitem) + { + vepitem->event = *__event; + vepitem->revent = revent; + } + + rv = 0; + goto out; + break; + + case EPOLL_CTL_DEL: + /* + * supplied file descriptor is not + * registered with this epoll instance + * */ + /* vepitem not exist */ + p = hash_get (vsm->epitemidx_by_epfdfd, epfdfd.key); + if (!p) + { + rv = -ENOENT; + goto out; + } + vepitemidx = *(i32 *) p; + hash_unset (vsm->epitemidx_by_epfdfd, epfdfd.key); + + /* update epitemidxs */ + /* by_epfd */ + p = hash_get (vsm->epitemidxs_by_epfd, __epfd); + if (!p) /* not exist */ + { + rv = -ENOENT; + goto out; + } + else /* exists */ + { + vepitemidxs = *(i32 **) p; + vec_idx = vec_search (vepitemidxs, vepitemidx); + if (vec_idx != ~0) + { + vec_del1 (vepitemidxs, vec_idx); + if (!vec_len (vepitemidxs)) + { + vec_free (vepitemidxs); + hash_unset (vsm->epitemidxs_by_epfd, __epfd); + } + } + } + + /* update epitemidxs */ + /* by_fd */ + p = hash_get (vsm->epitemidxs_by_fd, __fd); + if (!p) /* not exist */ + { + rv = -ENOENT; + goto out; + } + else /* exists */ + { + vepitemidxs = *(i32 **) p; + vec_idx = vec_search (vepitemidxs, vepitemidx); + if (vec_idx != ~0) + { + vec_del1 (vepitemidxs, vec_idx); + if (!vec_len (vepitemidxs)) + { + vec_free (vepitemidxs); + hash_unset (vsm->epitemidxs_by_fd, __fd); + } + } + } + + /* pool put vepitem */ + vepitem = pool_elt_at_index (vsm->vepitems, vepitemidx); + if (free_vepitem_on_del) + { + if (!vepitem) + { + rv = -ENOENT; + goto out; + } + vepitem_init (vepitem); + pool_put (vsm->vepitems, vepitem); + } + else + { + if (!vepitem) + { + vepitem_init (vepitem); + } + } + + /* decrement vepoll fd count by 1 */ + vepoll->count -= 1; + + rv = 0; + goto out; + break; + + default: + rv = -EINVAL; + goto out; + break; + } + +out: + return rv; +} + +/* + * PRE: 00. null pointer check on __event + * 01. all other parameters are validated + */ + +static int +vcom_socket_epoll_ctl_internal (int __epfd, int __op, int __fd, + struct epoll_event *__event, + int free_vepitem_on_del) +{ + int rv = -1; + + /* vcom_socket_main_t *vsm = &vcom_socket_main; */ + vcom_epoll_t *vepoll; + + /*__fd could could be vcom socket or vcom epoll or kernel fd */ + void *vfd; + vcom_epoll_t *vfd_vepoll; + vcom_socket_t *vfd_vsock; + + i32 vep_idx; + i32 vfd_id; + + vcom_fd_type_t type = FD_TYPE_INVALID; + + /* validate __event */ + + /* get vep_idx and vepoll */ + vep_idx = vcom_socket_get_vep_idx_and_vepoll (__epfd, &vepoll); + if (vep_idx == INVALID_VEP_IDX) + { + return -EBADF; + } + + /* get vcom fd type, vfd_id and vfd */ + vfd_id = vcom_socket_get_sid_and_vsock (__fd, &vfd_vsock); + if (vfd_id != INVALID_SESSION_ID) + { + type = FD_TYPE_VCOM_SOCKET; + vfd = vfd_vsock; + } + else if ((vfd_id = vcom_socket_get_vep_idx_and_vepoll (__fd, &vfd_vepoll)) + != INVALID_VEP_IDX) + { + type = FD_TYPE_EPOLL; + vfd = vfd_vepoll; + } + else + { + /* FD_TYPE_KERNEL not supported by epoll instance */ + type = FD_TYPE_INVALID; + return -EBADF; + } + + + /* vepoll and vsock are now valid */ + rv = vppcom_epoll_ctl (vep_idx, __op, vfd_id, __event); + if (rv < 0) + { + return rv; + } + + rv = vcom_socket_ctl_vepitem (__epfd, __op, __fd, + __event, + vep_idx, vepoll, + vfd_id, vfd, type, free_vepitem_on_del); + return rv; +} + +int +vcom_socket_epoll_ctl (int __epfd, int __op, int __fd, + struct epoll_event *__event) +{ + int rv = -1; + + rv = vcom_socket_epoll_ctl_internal (__epfd, __op, __fd, __event, 1); + return rv; +} + +static int +vcom_socket_epoll_ctl1 (int __epfd, int __op, int __fd, + struct epoll_event *__event) +{ + int rv = -1; + + rv = vcom_socket_epoll_ctl_internal (__epfd, __op, __fd, __event, 0); + return rv; +} + +int +vcom_socket_epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, + const __sigset_t * __ss) +{ + int rv = -EBADF; + + /* in seconds eg. 3.123456789 seconds */ + double time_to_wait = (double) 0; + + i32 vep_idx; + + /* validate __event */ + if (!__events) + { + rv = -EFAULT; + goto out; + } + + /* validate __timeout */ + if (__timeout > 0) + { + time_to_wait = (double) __timeout / (double) 1000; + } + else if (__timeout == 0) + { + time_to_wait = (double) 0; + } + else if (__timeout == -1) + { + time_to_wait = ~0; + } + else + { + rv = -EBADF; + goto out; + } + + /* get vep_idx */ + vep_idx = vcom_socket_get_vep_idx (__epfd); + if (vep_idx != INVALID_VEP_IDX) + { + rv = vppcom_epoll_wait (vep_idx, __events, __maxevents, time_to_wait); + } +out: + return rv; +} + +static inline void +vcom_pollfds_2_selectfds ( + /* src */ + struct pollfd *__fds, nfds_t __nfds, + /* dest */ + int vcom_nfds, + fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds) +{ + nfds_t fds_idx = 0; + + for (fds_idx = 0; fds_idx < __nfds; fds_idx++) + { + /* ignore negative fds */ + if (__fds[fds_idx].fd < 0) + { + continue; + } + + /* for POLLRDHUP, POLLERR, POLLHUP and POLLNVAL */ + FD_SET (__fds[fds_idx].fd, vcom_exceptfds); + + /* requested events */ + if (__fds[fds_idx].events) + { + if (__fds[fds_idx].events & POLLIN) + { + FD_SET (__fds[fds_idx].fd, vcom_readfds); + } + if (__fds[fds_idx].events & POLLPRI) + { + FD_SET (__fds[fds_idx].fd, vcom_readfds); + } + if (__fds[fds_idx].events & POLLOUT) + { + FD_SET (__fds[fds_idx].fd, vcom_writefds); + } +#if defined __USE_XOPEN || defined __USE_XOPEN2K8 + if (__fds[fds_idx].events & POLLRDNORM) + { + FD_SET (__fds[fds_idx].fd, vcom_readfds); + } + if (__fds[fds_idx].events & POLLRDBAND) + { + FD_SET (__fds[fds_idx].fd, vcom_readfds); + } + if (__fds[fds_idx].events & POLLWRNORM) + { + FD_SET (__fds[fds_idx].fd, vcom_writefds); + } + if (__fds[fds_idx].events & POLLWRBAND) + { + FD_SET (__fds[fds_idx].fd, vcom_writefds); + } +#endif + } + } /* for (fds_idx = 0; fds_idx < __nfds; fds_idx++) */ +} + +static inline void +vcom_selectfds_2_pollfds ( + /* dest */ + struct pollfd *__fds, nfds_t __nfds, int *nfd, + /* src */ + int vcom_nfds, + fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds) +{ + nfds_t fds_idx = 0; + + + for (fds_idx = 0; fds_idx < __nfds; fds_idx++) + { + /* ignore negative fds */ + if (__fds[fds_idx].fd < 0) + { + __fds[fds_idx].revents = 0; + } + + /* for POLLRDHUP, POLLERR, POLLHUP and POLLNVAL */ + if (FD_ISSET (__fds[fds_idx].fd, vcom_exceptfds)) + { + /* + * TBD: for now any select exception + * is flagged as POLLERR + * */ + __fds[fds_idx].revents |= POLLERR; + } + + /* requested events */ + if (__fds[fds_idx].events & POLLIN) + { + if (FD_ISSET (__fds[fds_idx].fd, vcom_readfds)) + { + __fds[fds_idx].revents |= POLLIN; + } + } + if (__fds[fds_idx].events & POLLPRI) + { + if (FD_ISSET (__fds[fds_idx].fd, vcom_readfds)) + { + __fds[fds_idx].revents |= POLLIN; + } + } + if (__fds[fds_idx].events & POLLOUT) + { + if (FD_ISSET (__fds[fds_idx].fd, vcom_writefds)) + { + __fds[fds_idx].revents |= POLLOUT; + } + } +#if defined __USE_XOPEN || defined __USE_XOPEN2K8 + if (__fds[fds_idx].events & POLLRDNORM) + { + if (FD_ISSET (__fds[fds_idx].fd, vcom_readfds)) + { + __fds[fds_idx].revents |= POLLRDNORM; + } + } + if (__fds[fds_idx].events & POLLRDBAND) + { + if (FD_ISSET (__fds[fds_idx].fd, vcom_readfds)) + { + __fds[fds_idx].revents |= POLLRDBAND; + } + } + if (__fds[fds_idx].events & POLLWRNORM) + { + if (FD_ISSET (__fds[fds_idx].fd, vcom_writefds)) + { + __fds[fds_idx].revents |= POLLWRNORM; + } + } + if (__fds[fds_idx].events & POLLWRBAND) + { + if (FD_ISSET (__fds[fds_idx].fd, vcom_writefds)) + { + __fds[fds_idx].revents |= POLLWRBAND; + } + } +#endif + } /* for (fds_idx = 0; fds_idx < __nfds; fds_idx++) */ + + /* + * nfd: + * the number of structures which have nonzero revents fields + * (in other words, those descriptors with events or + * errors reported) + * */ + *nfd = 0; + for (fds_idx = 0; fds_idx < __nfds; fds_idx++) + { + /* ignore negative fds */ + if (__fds[fds_idx].fd < 0) + { + continue; + } + + if (__fds[fds_idx].revents) + { + (*nfd)++; + } + } +} + +/* + * PRE: parameters are validated, + * vcom_socket_poll is always called with __timeout set to zero + * hence returns immediately + * + * ACTION: handle non negative validated vcom fds and ignore rest + */ + +/* + * implements vcom_socket_poll () interface + * + * internally uses vcom_socket_select () + * to realize the behavior + * */ +int +vcom_socket_poll_select_impl (struct pollfd *__fds, nfds_t __nfds, + int __timeout) +{ + int rv; + pid_t pid = getpid (); + + nfds_t fds_idx = 0; + int nfd = 0; + + /* vcom */ + int vcom_nfds = 0; + fd_set vcom_readfds; + fd_set vcom_writefds; + fd_set vcom_exceptfds; + int vcom_nfd = -1; + /* invalid max_vcom_fd is -1 */ + int max_vcom_fd = -1; + + /* __timeout is zero to get ready events and return immediately */ + struct timeval tv = {.tv_sec = 0,.tv_usec = 0 }; + + /* validate __nfds from select perspective */ + if (__nfds > FD_SETSIZE) + { + rv = -EINVAL; + goto poll_done; + } + + /* zero vcom fd sets */ + /* + * V vcom fd set + */ +#define _(V) \ + FD_ZERO ((V)) + + _(&vcom_readfds); + _(&vcom_writefds); + _(&vcom_exceptfds); +#undef _ + + vcom_nfds = 0; + vcom_nfd = -1; + + + for (fds_idx = 0; fds_idx < __nfds; fds_idx++) + { + /* ignore negative fds */ + if (__fds[fds_idx].fd < 0) + { + continue; + } + + /* non negative validated vcom fds */ + if (__fds[fds_idx].fd > FD_SETSIZE) + { + rv = -EINVAL; + goto poll_done; + } + + /* max_vcom_fd and vcom_nfd */ + if (__fds[fds_idx].fd > max_vcom_fd) + { + /* requested events */ + if (__fds[fds_idx].events) + { + max_vcom_fd = __fds[fds_idx].fd; + } + } + ++vcom_nfd; + } + + vcom_nfds = max_vcom_fd != -1 ? max_vcom_fd + 1 : 0; + + if (!vcom_nfds) + { + rv = vcom_nfds; + goto poll_done; + } + + vcom_pollfds_2_selectfds ( + /* src */ + __fds, __nfds, + /* dest */ + vcom_nfds, + &vcom_readfds, &vcom_writefds, &vcom_exceptfds); + + /* select on vcom fds */ + vcom_nfd = vcom_socket_select (vcom_nfds, + &vcom_readfds, + &vcom_writefds, &vcom_exceptfds, &tv); + if (VCOM_DEBUG > 2) + fprintf (stderr, + "[%d] vcom_socket_select: " + "'%04d'='%04d'\n", pid, vcom_nfd, vcom_nfds); + + if (vcom_nfd < 0) + { + rv = vcom_nfd; + goto poll_done; + } + + vcom_selectfds_2_pollfds ( + /* dest */ + __fds, __nfds, &nfd, + /* src */ + vcom_nfds, + &vcom_readfds, &vcom_writefds, &vcom_exceptfds); + + rv = nfd; + +poll_done: + return rv; +} + +/* + * TBD: remove this static function once vppcom + * has an implementation in place + * + * ACTION: + */ +static int +vppcom_poll (struct pollfd *__fds, nfds_t __nfds, double time_to_wait) +{ + return -EOPNOTSUPP; +} + +int +vcom_socket_poll_vppcom_impl (struct pollfd *__fds, nfds_t __nfds, + int __timeout) +{ + nfds_t fds_idx = 0; + + /* in seconds eg. 3.123456789 seconds */ + double time_to_wait = (double) 0; + + i32 sid; + i32 vep_idx; + + /* replace vcom fd with session idx */ + for (fds_idx = 0; fds_idx < __nfds; fds_idx++) + { + /* ignore negative fds */ + if (__fds[fds_idx].fd < 0) + { + continue; + } + + /* non negative validated vcom fds */ + sid = vcom_socket_get_sid (__fds[fds_idx].fd); + if (sid != INVALID_SESSION_ID) + { + __fds[fds_idx].fd = sid; + } + else + { + /* get vep_idx */ + vep_idx = vcom_socket_get_vep_idx (__fds[fds_idx].fd); + if (vep_idx != INVALID_VEP_IDX) + { + __fds[fds_idx].fd = vep_idx; + } + else + { + return -EBADF; + } + } + } + + /* validate __timeout */ + if (__timeout > 0) + { + time_to_wait = (double) __timeout / (double) 1000; + } + else if (__timeout == 0) + { + time_to_wait = (double) 0; + } + else if (__timeout < 0) + { + time_to_wait = ~0; + } + else + { + return -EBADF; + } + + return vppcom_poll (__fds, __nfds, time_to_wait); +} + +int +vcom_socket_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout) +{ + /* select an implementation */ + + /* return vcom_socket_poll_vppcom_impl (__fds, __nfds, __timeout); */ + return vcom_socket_poll_select_impl (__fds, __nfds, __timeout); +} + +#ifdef __USE_GNU +int +vcom_socket_ppoll (struct pollfd *__fds, nfds_t __nfds, + const struct timespec *__timeout, const __sigset_t * __ss) +{ + return -EOPNOTSUPP; +} +#endif + +int +vcom_socket_main_init (void) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + + if (VCOM_DEBUG > 0) + printf ("vcom_socket_main_init\n"); + + if (!vsm->init) + { + /* TBD: define FD_MAXSIZE and use it here */ + pool_alloc (vsm->vsockets, FD_SETSIZE); + vsm->sockidx_by_fd = hash_create (0, sizeof (i32)); + + pool_alloc (vsm->vepolls, FD_SETSIZE); + vsm->epollidx_by_epfd = hash_create (0, sizeof (i32)); + + pool_alloc (vsm->vepitems, FD_SETSIZE); + vsm->epitemidx_by_epfdfd = hash_create (0, sizeof (i32)); + + vsm->epitemidxs_by_epfd = hash_create (0, sizeof (i32 *)); + vsm->epitemidxs_by_fd = hash_create (0, sizeof (i32 *)); + + vsm->init = 1; + } + + return 0; +} + + +void +vcom_socket_main_show (void) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_socket_t *vsock; + + vcom_epoll_t *vepoll; + + vcom_epitem_t *vepitem; + + i32 epfd; + i32 fd; + i32 *vepitemidxs, *vepitemidxs_var; + + if (vsm->init) + { + /* from active list of vsockets show vsock */ + + /* *INDENT-OFF* */ + pool_foreach (vsock, vsm->vsockets, + ({ + printf( + "fd='%04d', sid='%08x',type='%-30s'\n", + vsock->fd, vsock->sid, + vcom_socket_type_str (vsock->type)); + })); + /* *INDENT-ON* */ + + /* from active list of vepolls, show vepoll */ + + /* *INDENT-OFF* */ + pool_foreach (vepoll, vsm->vepolls, + ({ + printf( + "epfd='%04d', vep_idx='%08x', " + "type='%-30s', " + "flags='%d', count='%d', close='%d'\n", + vepoll->epfd, vepoll->vep_idx, + vcom_socket_epoll_type_str (vepoll->type), + vepoll->flags, vepoll->count, vepoll->close); + })); + /* *INDENT-ON* */ + + /* from active list of vepitems, show vepitem */ + + /* *INDENT-OFF* */ + pool_foreach (vepitem, vsm->vepitems, + ({ + printf( + "epfd='%04d', fd='%04d', " + "next_fd='%04d', prev_fd='%04d', " + "type='%-30s', " + "events='%04x', revents='%04x'\n", + vepitem->epfd, vepitem->fd, + vepitem->next_fd, vepitem->prev_fd, + vcom_socket_vcom_fd_type_str (vepitem->type), + vepitem->event.events, vepitem->revent.events); + })); + + /* *INDENT-ON* */ + + /* show epitemidxs for epfd */ + /* *INDENT-OFF* */ + hash_foreach (epfd, vepitemidxs, + vsm->epitemidxs_by_epfd, + ({ + printf("\n[ '%04d': ", epfd); + vec_foreach (vepitemidxs_var,vepitemidxs) + { + printf("'%04d' ", (int)vepitemidxs_var[0]); + } + printf("]\n"); + })); + /* *INDENT-ON* */ + + /* show epitemidxs for fd */ + /* *INDENT-OFF* */ + hash_foreach (fd, vepitemidxs, + vsm->epitemidxs_by_fd, + ({ + printf("\n{ '%04d': ", fd); + vec_foreach (vepitemidxs_var,vepitemidxs) + { + printf("'%04d' ", (int)vepitemidxs_var[0]); + } + printf("}\n"); + })); + /* *INDENT-ON* */ + + } +} + +void +vcom_socket_main_destroy (void) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_socket_t *vsock; + + vcom_epoll_t *vepoll; + + vcom_epitem_t *vepitem; + + i32 epfd; + i32 fd; + i32 *vepitemidxs; + + + if (VCOM_DEBUG > 0) + printf ("vcom_socket_main_destroy\n"); + + if (vsm->init) + { + + /* + * from active list of vepitems, + * remove all "vepitem" elements from the pool in a safe way + * */ + + /* *INDENT-OFF* */ + pool_flush (vepitem, vsm->vepitems, + ({ + if (vepitem->type == FD_TYPE_EPOLL || FD_TYPE_VCOM_SOCKET) + { + vcom_socket_epoll_ctl1 (vepitem->epfd, EPOLL_CTL_DEL, + vepitem->fd, NULL); + vepitem_init (vepitem); + } + })); + /* *INDENT-ON* */ + + pool_free (vsm->vepitems); + hash_free (vsm->epitemidx_by_epfdfd); + + /* free vepitemidxs for each epfd */ + /* *INDENT-OFF* */ + hash_foreach (epfd, vepitemidxs, + vsm->epitemidxs_by_epfd, + ({ + vec_free (vepitemidxs); + })); + /* *INDENT-ON* */ + hash_free (vsm->epitemidxs_by_epfd); + + /* free vepitemidxs for each fd */ + /* *INDENT-OFF* */ + hash_foreach (fd, vepitemidxs, + vsm->epitemidxs_by_fd, + ({ + vec_free (vepitemidxs); + })); + /* *INDENT-ON* */ + hash_free (vsm->epitemidxs_by_fd); + + + /* + * from active list of vsockets, + * close socket and vppcom session + * */ + + /* *INDENT-OFF* */ + pool_foreach (vsock, vsm->vsockets, + ({ + if (vsock->type == SOCKET_TYPE_VPPCOM_BOUND) + { + vppcom_session_close (vsock->sid); + vcom_socket_close_socket (vsock->fd); + vsocket_init (vsock); + } + })); + /* *INDENT-ON* */ + + /* + * return vsocket element to the pool + * */ + + /* *INDENT-OFF* */ + pool_flush (vsock, vsm->vsockets, + ({ + // vsocket_init(vsock); + ; + })); + /* *INDENT-ON* */ + + pool_free (vsm->vsockets); + hash_free (vsm->sockidx_by_fd); + + /* + * from active list of vepolls, + * close epoll and vppcom_epoll + * */ + + /* *INDENT-OFF* */ + pool_foreach (vepoll, vsm->vepolls, + ({ + if (vepoll->type == EPOLL_TYPE_VPPCOM_BOUND) + { + vppcom_session_close (vepoll->vep_idx); + vcom_socket_close_epoll (vepoll->epfd); /* TBD: */ + vepoll_init (vepoll); + } + })); + /* *INDENT-ON* */ + + /* + * return vepoll element to the pool + * */ + + /* *INDENT-OFF* */ + pool_flush (vepoll, vsm->vepolls, + ({ + // vepoll_init(vepoll); + ; + })); + /* *INDENT-ON* */ + + pool_free (vsm->vepolls); + hash_free (vsm->epollidx_by_epfd); + + vsm->init = 0; + } +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vcl/vcom_socket.h b/src/vcl/vcom_socket.h new file mode 100644 index 00000000000..4f5e4abfb15 --- /dev/null +++ b/src/vcl/vcom_socket.h @@ -0,0 +1,471 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vcom_socket_h +#define included_vcom_socket_h + +#include <string.h> + +#include <vcl/vcom_glibc_socket.h> +#include <vppinfra/types.h> +#include <sys/socket.h> + +#define INVALID_SESSION_ID (~0) +#define INVALID_FD (~0) + +#define INVALID_VEP_IDX INVALID_SESSION_ID +#define INVALID_EPFD INVALID_FD + +typedef enum +{ + SOCKET_TYPE_UNBOUND = 0, + SOCKET_TYPE_KERNEL_BOUND, + SOCKET_TYPE_VPPCOM_BOUND +} vcom_socket_type_t; + +typedef enum +{ + EPOLL_TYPE_UNBOUND = 0, + EPOLL_TYPE_KERNEL_BOUND, + EPOLL_TYPE_VPPCOM_BOUND +} vcom_epoll_type_t; + +typedef enum +{ + FD_TYPE_INVALID = 0, + FD_TYPE_KERNEL, + FD_TYPE_EPOLL, + FD_TYPE_VCOM_SOCKET, + /* add new types here */ + /* FD_TYPE_MAX should be the last entry */ + FD_TYPE_MAX +} vcom_fd_type_t; + +typedef struct +{ + /* file descriptor - + * fd 0, 1, 2 have special meaning and are reserved, + * -1 denote invalid fd */ + i32 fd; + + /* session id - -1 denote invalid sid */ + i32 sid; + + /* socket type */ + vcom_socket_type_t type; + + /* vcom socket attributes here */ + +} vcom_socket_t; + +typedef struct +{ + /* epoll file descriptor - + * epfd 0, 1, 2 have special meaning and are reserved, + * -1 denote invalid epfd */ + i32 epfd; + + /* vep idx - -1 denote invalid vep_idx */ + i32 vep_idx; + + /* epoll type */ + vcom_epoll_type_t type; + + /* flags - 0 or EPOLL_CLOEXEC */ + i32 flags; + + /* vcom epoll attributes here */ + + /* + * 00. count of file descriptors currently registered + * on this epoll instance. + * 01. number of file descriptors in the epoll set. + * 02. EPOLL_CTL_ADD, EPOLL_CTL_MOD, EPOLL_CTL_DEL + * update the count. + * 03. cached for frequent access. + * */ + i32 count; + + /* close( ) called on this epoll instance */ + /* 0 - close ( ) not called, 1 - close( ) called. */ + u32 close; + +} vcom_epoll_t; + +typedef struct +{ + /* "container" of this item */ + i32 epfd; + + /* fd - file descriptor information this item refers to */ + i32 fd; + /* next and prev fd in the "epoll set" of epfd */ + i32 next_fd; + i32 prev_fd; + + /* vcom fd type */ + vcom_fd_type_t type; + + /* interested events and the source fd */ + struct epoll_event event; + + /* ready events and the source fd */ + struct epoll_event revent; + + /* epitem attributes here */ + +} vcom_epitem_t; + +typedef union vcom_epitem_key +{ + struct + { + i32 fd; + i32 epfd; + }; + i64 key; +} __EPOLL_PACKED vcom_epitem_key_t; + +static inline char * +vcom_socket_type_str (vcom_socket_type_t t) +{ + switch (t) + { + case SOCKET_TYPE_UNBOUND: + return "SOCKET_TYPE_UNBOUND"; + + case SOCKET_TYPE_KERNEL_BOUND: + return "SOCKET_TYPE_KERNEL_BOUND"; + + case SOCKET_TYPE_VPPCOM_BOUND: + return "SOCKET_TYPE_VPPCOM_BOUND"; + + default: + return "SOCKET_TYPE_UNKNOWN"; + } +} + +static inline char * +vcom_socket_epoll_type_str (vcom_epoll_type_t t) +{ + switch (t) + { + case EPOLL_TYPE_UNBOUND: + return "EPOLL_TYPE_UNBOUND"; + + case EPOLL_TYPE_KERNEL_BOUND: + return "EPOLL_TYPE_KERNEL_BOUND"; + + case EPOLL_TYPE_VPPCOM_BOUND: + return "EPOLL_TYPE_VPPCOM_BOUND"; + + default: + return "EPOLL_TYPE_UNKNOWN"; + } +} + +static inline char * +vcom_socket_vcom_fd_type_str (vcom_fd_type_t t) +{ + switch (t) + { + case FD_TYPE_KERNEL: + return "FD_TYPE_KERNEL"; + + case FD_TYPE_EPOLL: + return "FD_TYPE_EPOLL"; + + case FD_TYPE_VCOM_SOCKET: + return "FD_TYPE_VCOM_SOCKET"; + + default: + return "FD_TYPE_UNKNOWN"; + } +} + +static inline int +vcom_socket_type_is_vppcom_bound (vcom_socket_type_t t) +{ + return t == SOCKET_TYPE_VPPCOM_BOUND; +} + +static inline int +vcom_socket_epoll_type_is_vppcom_bound (vcom_epoll_type_t t) +{ + return t == EPOLL_TYPE_VPPCOM_BOUND; +} + +static inline void +vsocket_init (vcom_socket_t * vsock) +{ + memset (vsock, 0, sizeof (*vsock)); + + vsock->fd = INVALID_FD; + vsock->sid = INVALID_SESSION_ID; + vsock->type = SOCKET_TYPE_UNBOUND; + /* vcom socket attributes init here */ +} + +static inline void +vepoll_init (vcom_epoll_t * vepoll) +{ + memset (vepoll, 0, sizeof (*vepoll)); + + vepoll->epfd = INVALID_EPFD; + vepoll->vep_idx = INVALID_VEP_IDX; + vepoll->type = EPOLL_TYPE_UNBOUND; + vepoll->flags = 0; + + vepoll->count = 0; + vepoll->close = 0; + /* vcom epoll attributes init here */ +} + +static inline void +vepitem_init (vcom_epitem_t * vepitem) +{ + struct epoll_event event = {.events = 0,.data.fd = INVALID_FD }; + + memset (vepitem, 0, sizeof (*vepitem)); + + vepitem->epfd = INVALID_EPFD; + + vepitem->fd = INVALID_FD; + vepitem->next_fd = INVALID_FD; + vepitem->prev_fd = INVALID_FD; + + vepitem->type = FD_TYPE_INVALID; + + vepitem->event = event; + vepitem->revent = event; + /* vepoll attributes init here */ +} + +static inline void +vepitemkey_init (vcom_epitem_key_t * epfdfd) +{ + memset (epfdfd, 0, sizeof (*epfdfd)); + + epfdfd->epfd = INVALID_EPFD; + epfdfd->fd = INVALID_FD; +} + +static inline void +vsocket_set (vcom_socket_t * vsock, i32 fd, i32 sid, vcom_socket_type_t type) +{ + vsock->fd = fd; + vsock->sid = sid; + vsock->type = type; + /* vcom socket attributes set here */ +} + +static inline void +vepoll_set (vcom_epoll_t * vepoll, + i32 epfd, i32 vep_idx, + vcom_epoll_type_t type, i32 flags, i32 count, u32 close) +{ + vepoll->epfd = epfd; + vepoll->vep_idx = vep_idx; + vepoll->type = type; + vepoll->flags = flags; + + vepoll->count = count; + vepoll->close = close; + /* vcom epoll attributes set here */ +} + +static inline void +vepitem_set (vcom_epitem_t * vepitem, + i32 epfd, + i32 fd, i32 next_fd, i32 prev_fd, + vcom_fd_type_t type, + struct epoll_event event, struct epoll_event revent) +{ + vepitem->epfd = epfd; + + vepitem->fd = fd; + vepitem->next_fd = next_fd; + vepitem->prev_fd = prev_fd; + + vepitem->type = type; + + vepitem->event = event; + vepitem->revent = revent; + /* vcom epitem attributes set here */ +} + +static inline void +vepitemkey_set (vcom_epitem_key_t * epfdfd, i32 epfd, i32 fd) +{ + epfdfd->epfd = epfd; + epfdfd->fd = fd; +} + +static inline int +vsocket_is_vppcom_bound (vcom_socket_t * vsock) +{ + return vcom_socket_type_is_vppcom_bound (vsock->type); +} + +static inline int +vepoll_is_vppcom_bound (vcom_epoll_t * vepoll) +{ + return vcom_socket_epoll_type_is_vppcom_bound (vepoll->type); +} + +int vcom_socket_main_init (void); + +void vcom_socket_main_destroy (void); + +void vcom_socket_main_show (void); + +int vcom_socket_is_vcom_fd (int fd); + +int vcom_socket_is_vcom_epfd (int epfd); + +int vcom_socket_close (int __fd); + +ssize_t vcom_socket_read (int __fd, void *__buf, size_t __nbytes); + +ssize_t vcom_socket_readv (int __fd, const struct iovec *__iov, int __iovcnt); + +ssize_t vcom_socket_write (int __fd, const void *__buf, size_t __n); + +ssize_t vcom_socket_writev (int __fd, const struct iovec *__iov, + int __iovcnt); + +int vcom_socket_fcntl_va (int __fd, int __cmd, va_list __ap); + +int vcom_socket_ioctl_va (int __fd, unsigned long int __cmd, va_list __ap); + +int +vcom_socket_select (int vcom_nfds, fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds, + struct timeval *__restrict timeout); + + +int vcom_socket_socket (int __domain, int __type, int __protocol); + +int +vcom_socket_socketpair (int __domain, int __type, int __protocol, + int __fds[2]); + +int vcom_socket_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len); + +int +vcom_socket_getsockname (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len); + +int +vcom_socket_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len); + +int +vcom_socket_getpeername (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len); + +ssize_t +vcom_socket_send (int __fd, const void *__buf, size_t __n, int __flags); + +ssize_t vcom_socket_recv (int __fd, void *__buf, size_t __n, int __flags); + +/* + * RETURN 1 if __fd is (SOCK_STREAM, SOCK_SEQPACKET), + * 0 otherwise + * */ +int vcom_socket_is_connection_mode_socket (int __fd); + +ssize_t +vcom_socket_sendto (int __fd, const void *__buf, size_t __n, + int __flags, __CONST_SOCKADDR_ARG __addr, + socklen_t __addr_len); + +ssize_t +vcom_socket_recvfrom (int __fd, void *__restrict __buf, size_t __n, + int __flags, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len); + +ssize_t +vcom_socket_sendmsg (int __fd, const struct msghdr *__message, int __flags); + +#ifdef __USE_GNU +int +vcom_socket_sendmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags); +#endif + +ssize_t vcom_socket_recvmsg (int __fd, struct msghdr *__message, int __flags); + +#ifdef __USE_GNU +int +vcom_socket_recvmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags, + struct timespec *__tmo); +#endif + +int +vcom_socket_getsockopt (int __fd, int __level, int __optname, + void *__restrict __optval, + socklen_t * __restrict __optlen); + +int +vcom_socket_setsockopt (int __fd, int __level, int __optname, + const void *__optval, socklen_t __optlen); + +int vcom_socket_listen (int __fd, int __n); + +int +vcom_socket_accept (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len); + +#ifdef __USE_GNU +int +vcom_socket_accept4 (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int __flags); +#endif + +int vcom_socket_shutdown (int __fd, int __how); + +int vcom_socket_epoll_create1 (int __flags); + +int +vcom_socket_epoll_ctl (int __epfd, int __op, int __fd, + struct epoll_event *__event); + +int +vcom_socket_epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, + const __sigset_t * __ss); + +/* + * handle only vcom fds + */ +int vcom_socket_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout); + +#ifdef __USE_GNU +int +vcom_socket_ppoll (struct pollfd *__fds, nfds_t __nfds, + const struct timespec *__timeout, const __sigset_t * __ss); +#endif + +#endif /* included_vcom_socket_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vcl/vcom_socket_wrapper.c b/src/vcl/vcom_socket_wrapper.c new file mode 100644 index 00000000000..19a775de99e --- /dev/null +++ b/src/vcl/vcom_socket_wrapper.c @@ -0,0 +1,906 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2005-2008 Jelmer Vernooij <jelmer@samba.org> + * Copyright (C) 2006-2014 Stefan Metzmacher <metze@samba.org> + * Copyright (C) 2013-2014 Andreas Schneider <asn@samba.org> + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the author nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +/* + Socket wrapper library. Passes all socket communication over + unix domain sockets if the environment variable SOCKET_WRAPPER_DIR + is set. +*/ + +#include <signal.h> +#include <dlfcn.h> + +#include <stdio.h> +#include <stdarg.h> +#include <unistd.h> +#include <pthread.h> + +#include <vcl/vcom_socket_wrapper.h> + + +enum swrap_dbglvl_e +{ + SWRAP_LOG_ERROR = 0, + SWRAP_LOG_WARN, + SWRAP_LOG_DEBUG, + SWRAP_LOG_TRACE +}; + + +/* Macros for accessing mutexes */ +#define SWRAP_LOCK(m) do { \ + pthread_mutex_lock(&(m ## _mutex)); \ +} while(0) + +#define SWRAP_UNLOCK(m) do { \ + pthread_mutex_unlock(&(m ## _mutex)); \ +} while(0) + +/* Add new global locks here please */ +#define SWRAP_LOCK_ALL \ + SWRAP_LOCK(libc_symbol_binding); \ + +#define SWRAP_UNLOCK_ALL \ + SWRAP_UNLOCK(libc_symbol_binding); \ + + + +/* The mutex for accessing the global libc.symbols */ +static pthread_mutex_t libc_symbol_binding_mutex = PTHREAD_MUTEX_INITIALIZER; + +/* Function prototypes */ + +#ifdef NDEBUG +#define SWRAP_LOG(...) +#else +static void +swrap_log (enum swrap_dbglvl_e dbglvl, const char *func, + const char *format, ...) +PRINTF_ATTRIBUTE (3, 4); +#define SWRAP_LOG(dbglvl, ...) swrap_log((dbglvl), __func__, __VA_ARGS__) + + static void + swrap_log (enum swrap_dbglvl_e dbglvl, + const char *func, const char *format, ...) +{ + char buffer[1024]; + va_list va; + unsigned int lvl = SWRAP_LOG_WARN; + + va_start (va, format); + vsnprintf (buffer, sizeof (buffer), format, va); + va_end (va); + + if (lvl >= dbglvl) + { + switch (dbglvl) + { + case SWRAP_LOG_ERROR: + fprintf (stderr, + "SWRAP_ERROR(%d) - %s: %s\n", + (int) getpid (), func, buffer); + break; + case SWRAP_LOG_WARN: + fprintf (stderr, + "SWRAP_WARN(%d) - %s: %s\n", + (int) getpid (), func, buffer); + break; + case SWRAP_LOG_DEBUG: + fprintf (stderr, + "SWRAP_DEBUG(%d) - %s: %s\n", + (int) getpid (), func, buffer); + break; + case SWRAP_LOG_TRACE: + fprintf (stderr, + "SWRAP_TRACE(%d) - %s: %s\n", + (int) getpid (), func, buffer); + break; + } + } +} +#endif + + +/********************************************************* + * SWRAP LOADING LIBC FUNCTIONS + *********************************************************/ + +#ifdef HAVE_ACCEPT4 +typedef int (*__libc_accept4) (int sockfd, + struct sockaddr * addr, + socklen_t * addrlen, int flags); +#else +typedef int (*__libc_accept) (int sockfd, + struct sockaddr * addr, socklen_t * addrlen); +#endif +typedef int (*__libc_bind) (int sockfd, + const struct sockaddr * addr, socklen_t addrlen); +typedef int (*__libc_close) (int fd); +typedef int (*__libc_connect) (int sockfd, + const struct sockaddr * addr, + socklen_t addrlen); + +#if 0 +/* TBD: dup and dup2 to be implemented later */ +typedef int (*__libc_dup) (int fd); +typedef int (*__libc_dup2) (int oldfd, int newfd); +#endif + +typedef int (*__libc_fcntl) (int fd, int cmd, ...); +typedef FILE *(*__libc_fopen) (const char *name, const char *mode); +#ifdef HAVE_FOPEN64 +typedef FILE *(*__libc_fopen64) (const char *name, const char *mode); +#endif +#ifdef HAVE_EVENTFD +typedef int (*__libc_eventfd) (int count, int flags); +#endif +typedef int (*__libc_getpeername) (int sockfd, + struct sockaddr * addr, + socklen_t * addrlen); +typedef int (*__libc_getsockname) (int sockfd, + struct sockaddr * addr, + socklen_t * addrlen); +typedef int (*__libc_getsockopt) (int sockfd, + int level, + int optname, + void *optval, socklen_t * optlen); +typedef int (*__libc_ioctl) (int d, unsigned long int request, ...); +typedef int (*__libc_listen) (int sockfd, int backlog); +typedef int (*__libc_open) (const char *pathname, int flags, mode_t mode); +#ifdef HAVE_OPEN64 +typedef int (*__libc_open64) (const char *pathname, int flags, mode_t mode); +#endif /* HAVE_OPEN64 */ +typedef int (*__libc_openat) (int dirfd, const char *path, int flags, ...); +typedef int (*__libc_pipe) (int pipefd[2]); +typedef int (*__libc_read) (int fd, void *buf, size_t count); +typedef ssize_t (*__libc_readv) (int fd, const struct iovec * iov, + int iovcnt); +typedef int (*__libc_recv) (int sockfd, void *buf, size_t len, int flags); +typedef int (*__libc_recvfrom) (int sockfd, + void *buf, + size_t len, + int flags, + struct sockaddr * src_addr, + socklen_t * addrlen); +typedef int (*__libc_recvmsg) (int sockfd, const struct msghdr * msg, + int flags); +typedef int (*__libc_send) (int sockfd, const void *buf, size_t len, + int flags); +typedef int (*__libc_sendmsg) (int sockfd, const struct msghdr * msg, + int flags); +typedef int (*__libc_sendto) (int sockfd, const void *buf, size_t len, + int flags, const struct sockaddr * dst_addr, + socklen_t addrlen); +typedef int (*__libc_setsockopt) (int sockfd, int level, int optname, + const void *optval, socklen_t optlen); +#ifdef HAVE_SIGNALFD +typedef int (*__libc_signalfd) (int fd, const sigset_t * mask, int flags); +#endif +typedef int (*__libc_socket) (int domain, int type, int protocol); +typedef int (*__libc_socketpair) (int domain, int type, int protocol, + int sv[2]); +#ifdef HAVE_TIMERFD_CREATE +typedef int (*__libc_timerfd_create) (int clockid, int flags); +#endif +typedef ssize_t (*__libc_write) (int fd, const void *buf, size_t count); +typedef ssize_t (*__libc_writev) (int fd, const struct iovec * iov, + int iovcnt); + +typedef int (*__libc_shutdown) (int fd, int how); + +typedef int (*__libc_select) (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + struct timeval * __restrict __timeout); + +#ifdef __USE_XOPEN2K +typedef int (*__libc_pselect) (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + const struct timespec * __restrict __timeout, + const __sigset_t * __restrict __sigmask); +#endif + +typedef int (*__libc_epoll_create) (int __size); + +typedef int (*__libc_epoll_create1) (int __flags); + +typedef int (*__libc_epoll_ctl) (int __epfd, int __op, int __fd, + struct epoll_event * __event); + +typedef int (*__libc_epoll_wait) (int __epfd, struct epoll_event * __events, + int __maxevents, int __timeout); + +typedef int (*__libc_epoll_pwait) (int __epfd, struct epoll_event * __events, + int __maxevents, int __timeout, + const __sigset_t * __ss); + +typedef int (*__libc_poll) (struct pollfd * __fds, nfds_t __nfds, + int __timeout); + +#ifdef __USE_GNU +typedef int (*__libc_ppoll) (struct pollfd * __fds, nfds_t __nfds, + const struct timespec * __timeout, + const __sigset_t * __ss); +#endif + + +#define SWRAP_SYMBOL_ENTRY(i) \ + union { \ + __libc_##i f; \ + void *obj; \ + } _libc_##i + +struct swrap_libc_symbols +{ +#ifdef HAVE_ACCEPT4 + SWRAP_SYMBOL_ENTRY (accept4); +#else + SWRAP_SYMBOL_ENTRY (accept); +#endif + SWRAP_SYMBOL_ENTRY (bind); + SWRAP_SYMBOL_ENTRY (close); + SWRAP_SYMBOL_ENTRY (connect); +#if 0 + /* TBD: dup and dup2 to be implemented later */ + SWRAP_SYMBOL_ENTRY (dup); + SWRAP_SYMBOL_ENTRY (dup2); +#endif + SWRAP_SYMBOL_ENTRY (fcntl); + SWRAP_SYMBOL_ENTRY (fopen); +#ifdef HAVE_FOPEN64 + SWRAP_SYMBOL_ENTRY (fopen64); +#endif +#ifdef HAVE_EVENTFD + SWRAP_SYMBOL_ENTRY (eventfd); +#endif + SWRAP_SYMBOL_ENTRY (getpeername); + SWRAP_SYMBOL_ENTRY (getsockname); + SWRAP_SYMBOL_ENTRY (getsockopt); + SWRAP_SYMBOL_ENTRY (ioctl); + SWRAP_SYMBOL_ENTRY (listen); + SWRAP_SYMBOL_ENTRY (open); +#ifdef HAVE_OPEN64 + SWRAP_SYMBOL_ENTRY (open64); +#endif + SWRAP_SYMBOL_ENTRY (openat); + SWRAP_SYMBOL_ENTRY (pipe); + SWRAP_SYMBOL_ENTRY (read); + SWRAP_SYMBOL_ENTRY (readv); + SWRAP_SYMBOL_ENTRY (recv); + SWRAP_SYMBOL_ENTRY (recvfrom); + SWRAP_SYMBOL_ENTRY (recvmsg); + SWRAP_SYMBOL_ENTRY (send); + SWRAP_SYMBOL_ENTRY (sendmsg); + SWRAP_SYMBOL_ENTRY (sendto); + SWRAP_SYMBOL_ENTRY (setsockopt); +#ifdef HAVE_SIGNALFD + SWRAP_SYMBOL_ENTRY (signalfd); +#endif + SWRAP_SYMBOL_ENTRY (socket); + SWRAP_SYMBOL_ENTRY (socketpair); +#ifdef HAVE_TIMERFD_CREATE + SWRAP_SYMBOL_ENTRY (timerfd_create); +#endif + SWRAP_SYMBOL_ENTRY (write); + SWRAP_SYMBOL_ENTRY (writev); + + SWRAP_SYMBOL_ENTRY (shutdown); + SWRAP_SYMBOL_ENTRY (select); +#ifdef __USE_XOPEN2K + SWRAP_SYMBOL_ENTRY (pselect); +#endif + SWRAP_SYMBOL_ENTRY (epoll_create); + SWRAP_SYMBOL_ENTRY (epoll_create1); + SWRAP_SYMBOL_ENTRY (epoll_ctl); + SWRAP_SYMBOL_ENTRY (epoll_wait); + SWRAP_SYMBOL_ENTRY (epoll_pwait); + SWRAP_SYMBOL_ENTRY (poll); +#ifdef __USE_GNU + SWRAP_SYMBOL_ENTRY (ppoll); +#endif +}; + +struct swrap +{ + struct + { + void *handle; + void *socket_handle; + struct swrap_libc_symbols symbols; + } libc; +}; + +static struct swrap swrap; + +#define LIBC_NAME "libc.so" + +enum swrap_lib +{ + SWRAP_LIBC, +}; + +#ifndef NDEBUG +static const char * +swrap_str_lib (enum swrap_lib lib) +{ + switch (lib) + { + case SWRAP_LIBC: + return "libc"; + } + + /* Compiler would warn us about unhandled enum value if we get here */ + return "unknown"; +} +#endif + +static void * +swrap_load_lib_handle (enum swrap_lib lib) +{ + int flags = RTLD_LAZY; + void *handle = NULL; + int i; + +#ifdef RTLD_DEEPBIND + flags |= RTLD_DEEPBIND; +#endif + + switch (lib) + { + case SWRAP_LIBC: + handle = swrap.libc.handle; +#ifdef LIBC_SO + if (handle == NULL) + { + handle = dlopen (LIBC_SO, flags); + + swrap.libc.handle = handle; + } +#endif + if (handle == NULL) + { + for (i = 10; i >= 0; i--) + { + char soname[256] = { 0 }; + + snprintf (soname, sizeof (soname), "libc.so.%d", i); + handle = dlopen (soname, flags); + if (handle != NULL) + { + break; + } + } + + swrap.libc.handle = handle; + } + break; + } + + if (handle == NULL) + { + SWRAP_LOG (SWRAP_LOG_ERROR, + "Failed to dlopen library: %s\n", dlerror ()); + exit (-1); + } + + return handle; +} + +static void * +_swrap_bind_symbol (enum swrap_lib lib, const char *fn_name) +{ + void *handle; + void *func; + + handle = swrap_load_lib_handle (lib); + + func = dlsym (handle, fn_name); + if (func == NULL) + { + SWRAP_LOG (SWRAP_LOG_ERROR, + "Failed to find %s: %s\n", fn_name, dlerror ()); + exit (-1); + } + + SWRAP_LOG (SWRAP_LOG_TRACE, + "Loaded %s from %s", fn_name, swrap_str_lib (lib)); + + return func; +} + +#define swrap_bind_symbol_libc(sym_name) \ + SWRAP_LOCK(libc_symbol_binding); \ + if (swrap.libc.symbols._libc_##sym_name.obj == NULL) { \ + swrap.libc.symbols._libc_##sym_name.obj = \ + _swrap_bind_symbol(SWRAP_LIBC, #sym_name); \ + } \ + SWRAP_UNLOCK(libc_symbol_binding) + +/* + * IMPORTANT + * + * Functions especially from libc need to be loaded individually, you can't load + * all at once or gdb will segfault at startup. The same applies to valgrind and + * has probably something todo with with the linker. + * So we need load each function at the point it is called the first time. + */ +#ifdef HAVE_ACCEPT4 +int +libc_accept4 (int sockfd, + struct sockaddr *addr, socklen_t * addrlen, int flags) +{ + swrap_bind_symbol_libc (accept4); + + return swrap.libc.symbols._libc_accept4.f (sockfd, addr, addrlen, flags); +} + +#else /* HAVE_ACCEPT4 */ + +int +libc_accept (int sockfd, struct sockaddr *addr, socklen_t * addrlen) +{ + swrap_bind_symbol_libc (accept); + + return swrap.libc.symbols._libc_accept.f (sockfd, addr, addrlen); +} +#endif /* HAVE_ACCEPT4 */ + +int +libc_bind (int sockfd, const struct sockaddr *addr, socklen_t addrlen) +{ + swrap_bind_symbol_libc (bind); + + return swrap.libc.symbols._libc_bind.f (sockfd, addr, addrlen); +} + +int +libc_close (int fd) +{ + swrap_bind_symbol_libc (close); + + return swrap.libc.symbols._libc_close.f (fd); +} + +int +libc_connect (int sockfd, const struct sockaddr *addr, socklen_t addrlen) +{ + swrap_bind_symbol_libc (connect); + + return swrap.libc.symbols._libc_connect.f (sockfd, addr, addrlen); +} + +#if 0 +/* TBD: dup and dup2 to be implemented later */ +int +libc_dup (int fd) +{ + swrap_bind_symbol_libc (dup); + + return swrap.libc.symbols._libc_dup.f (fd); +} + +int +libc_dup2 (int oldfd, int newfd) +{ + swrap_bind_symbol_libc (dup2); + + return swrap.libc.symbols._libc_dup2.f (oldfd, newfd); +} +#endif + +#ifdef HAVE_EVENTFD +int +libc_eventfd (int count, int flags) +{ + swrap_bind_symbol_libc (eventfd); + + return swrap.libc.symbols._libc_eventfd.f (count, flags); +} +#endif + +DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE int +libc_vfcntl (int fd, int cmd, va_list ap) +{ + long int args[4]; + int rc; + int i; + + swrap_bind_symbol_libc (fcntl); + + for (i = 0; i < 4; i++) + { + args[i] = va_arg (ap, long int); + } + + rc = swrap.libc.symbols._libc_fcntl.f (fd, + cmd, + args[0], args[1], args[2], args[3]); + + return rc; +} + +DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE int +libc_vioctl (int fd, int cmd, va_list ap) +{ + long int args[4]; + int rc; + int i; + + swrap_bind_symbol_libc (ioctl); + + for (i = 0; i < 4; i++) + { + args[i] = va_arg (ap, long int); + } + + rc = swrap.libc.symbols._libc_ioctl.f (fd, + cmd, + args[0], args[1], args[2], args[3]); + + return rc; +} + +int +libc_getpeername (int sockfd, struct sockaddr *addr, socklen_t * addrlen) +{ + swrap_bind_symbol_libc (getpeername); + + return swrap.libc.symbols._libc_getpeername.f (sockfd, addr, addrlen); +} + +int +libc_getsockname (int sockfd, struct sockaddr *addr, socklen_t * addrlen) +{ + swrap_bind_symbol_libc (getsockname); + + return swrap.libc.symbols._libc_getsockname.f (sockfd, addr, addrlen); +} + +int +libc_getsockopt (int sockfd, + int level, int optname, void *optval, socklen_t * optlen) +{ + swrap_bind_symbol_libc (getsockopt); + + return swrap.libc.symbols._libc_getsockopt.f (sockfd, + level, + optname, optval, optlen); +} + +int +libc_listen (int sockfd, int backlog) +{ + swrap_bind_symbol_libc (listen); + + return swrap.libc.symbols._libc_listen.f (sockfd, backlog); +} + +int +libc_read (int fd, void *buf, size_t count) +{ + swrap_bind_symbol_libc (read); + + return swrap.libc.symbols._libc_read.f (fd, buf, count); +} + +ssize_t +libc_readv (int fd, const struct iovec * iov, int iovcnt) +{ + swrap_bind_symbol_libc (readv); + + return swrap.libc.symbols._libc_readv.f (fd, iov, iovcnt); +} + +int +libc_recv (int sockfd, void *buf, size_t len, int flags) +{ + swrap_bind_symbol_libc (recv); + + return swrap.libc.symbols._libc_recv.f (sockfd, buf, len, flags); +} + +int +libc_recvfrom (int sockfd, + void *buf, + size_t len, + int flags, struct sockaddr *src_addr, socklen_t * addrlen) +{ + swrap_bind_symbol_libc (recvfrom); + + return swrap.libc.symbols._libc_recvfrom.f (sockfd, + buf, + len, flags, src_addr, addrlen); +} + +int +libc_recvmsg (int sockfd, struct msghdr *msg, int flags) +{ + swrap_bind_symbol_libc (recvmsg); + + return swrap.libc.symbols._libc_recvmsg.f (sockfd, msg, flags); +} + +int +libc_send (int sockfd, const void *buf, size_t len, int flags) +{ + swrap_bind_symbol_libc (send); + + return swrap.libc.symbols._libc_send.f (sockfd, buf, len, flags); +} + +int +libc_sendmsg (int sockfd, const struct msghdr *msg, int flags) +{ + swrap_bind_symbol_libc (sendmsg); + + return swrap.libc.symbols._libc_sendmsg.f (sockfd, msg, flags); +} + +int +libc_sendto (int sockfd, + const void *buf, + size_t len, + int flags, const struct sockaddr *dst_addr, socklen_t addrlen) +{ + swrap_bind_symbol_libc (sendto); + + return swrap.libc.symbols._libc_sendto.f (sockfd, + buf, + len, flags, dst_addr, addrlen); +} + +int +libc_setsockopt (int sockfd, + int level, int optname, const void *optval, socklen_t optlen) +{ + swrap_bind_symbol_libc (setsockopt); + + return swrap.libc.symbols._libc_setsockopt.f (sockfd, + level, + optname, optval, optlen); +} + +int +libc_socket (int domain, int type, int protocol) +{ + swrap_bind_symbol_libc (socket); + + return swrap.libc.symbols._libc_socket.f (domain, type, protocol); +} + +int +libc_socketpair (int domain, int type, int protocol, int sv[2]) +{ + swrap_bind_symbol_libc (socketpair); + + return swrap.libc.symbols._libc_socketpair.f (domain, type, protocol, sv); +} + +ssize_t +libc_write (int fd, const void *buf, size_t count) +{ + swrap_bind_symbol_libc (write); + + return swrap.libc.symbols._libc_write.f (fd, buf, count); +} + +ssize_t +libc_writev (int fd, const struct iovec * iov, int iovcnt) +{ + swrap_bind_symbol_libc (writev); + + return swrap.libc.symbols._libc_writev.f (fd, iov, iovcnt); +} + +int +libc_shutdown (int fd, int how) +{ + swrap_bind_symbol_libc (shutdown); + + return swrap.libc.symbols._libc_shutdown.f (fd, how); +} + +int +libc_select (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + struct timeval *__restrict __timeout) +{ + swrap_bind_symbol_libc (select); + + return swrap.libc.symbols._libc_select.f (__nfds, __readfds, + __writefds, + __exceptfds, __timeout); +} + +#ifdef __USE_XOPEN2K +int +libc_pselect (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + const struct timespec *__restrict __timeout, + const __sigset_t * __restrict __sigmask) +{ + swrap_bind_symbol_libc (pselect); + + return swrap.libc.symbols._libc_pselect.f (__nfds, __readfds, + __writefds, + __exceptfds, + __timeout, __sigmask); +} +#endif + +int +libc_epoll_create (int __size) +{ + swrap_bind_symbol_libc (epoll_create); + + return swrap.libc.symbols._libc_epoll_create.f (__size); +} + +int +libc_epoll_create1 (int __flags) +{ + swrap_bind_symbol_libc (epoll_create1); + + return swrap.libc.symbols._libc_epoll_create1.f (__flags); +} + +int +libc_epoll_ctl (int __epfd, int __op, int __fd, struct epoll_event *__event) +{ + swrap_bind_symbol_libc (epoll_ctl); + + return swrap.libc.symbols._libc_epoll_ctl.f (__epfd, __op, __fd, __event); +} + +int +libc_epoll_wait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout) +{ + swrap_bind_symbol_libc (epoll_wait); + + return swrap.libc.symbols._libc_epoll_wait.f (__epfd, __events, + __maxevents, __timeout); +} + +int +libc_epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, const __sigset_t * __ss) +{ + swrap_bind_symbol_libc (epoll_pwait); + + return swrap.libc.symbols._libc_epoll_pwait.f (__epfd, __events, + __maxevents, __timeout, + __ss); +} + +int +libc_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout) +{ + swrap_bind_symbol_libc (poll); + + return swrap.libc.symbols._libc_poll.f (__fds, __nfds, __timeout); +} + +#ifdef __USE_GNU +int +libc_ppoll (struct pollfd *__fds, nfds_t __nfds, + const struct timespec *__timeout, const __sigset_t * __ss) +{ + swrap_bind_symbol_libc (ppoll); + + return swrap.libc.symbols._libc_ppoll.f (__fds, __nfds, __timeout, __ss); +} +#endif + +static void +swrap_thread_prepare (void) +{ + SWRAP_LOCK_ALL; +} + +static void +swrap_thread_parent (void) +{ + SWRAP_UNLOCK_ALL; +} + +static void +swrap_thread_child (void) +{ + SWRAP_UNLOCK_ALL; +} + +/**************************** + * CONSTRUCTOR + ***************************/ +void +swrap_constructor (void) +{ + /* + * If we hold a lock and the application forks, then the child + * is not able to unlock the mutex and we are in a deadlock. + * This should prevent such deadlocks. + */ + pthread_atfork (&swrap_thread_prepare, + &swrap_thread_parent, &swrap_thread_child); +} + +/**************************** + * DESTRUCTOR + ***************************/ + +/* + * This function is called when the library is unloaded and makes sure that + * sockets get closed and the unix file for the socket are unlinked. + */ +void +swrap_destructor (void) +{ + if (swrap.libc.handle != NULL) + { + dlclose (swrap.libc.handle); + } + if (swrap.libc.socket_handle) + { + dlclose (swrap.libc.socket_handle); + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vcl/vcom_socket_wrapper.h b/src/vcl/vcom_socket_wrapper.h new file mode 100644 index 00000000000..9e85ecf2b6c --- /dev/null +++ b/src/vcl/vcom_socket_wrapper.h @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2005-2008 Jelmer Vernooij <jelmer@samba.org> + * Copyright (C) 2006-2014 Stefan Metzmacher <metze@samba.org> + * Copyright (C) 2013-2014 Andreas Schneider <asn@samba.org> + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the author nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +/* + Socket wrapper library. Passes all socket communication over + unix domain sockets if the environment variable SOCKET_WRAPPER_DIR + is set. +*/ + +#ifndef included_vcom_socket_wrapper_h +#define included_vcom_socket_wrapper_h + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <sys/select.h> +#include <sys/epoll.h> +#include <poll.h> +#include <sys/uio.h> +#include <stdlib.h> + + +/* GCC have printf type attribute check. */ +#ifdef HAVE_FUNCTION_ATTRIBUTE_FORMAT +#define PRINTF_ATTRIBUTE(a,b) __attribute__ ((__format__ (__printf__, a, b))) +#else +#define PRINTF_ATTRIBUTE(a,b) +#endif /* HAVE_FUNCTION_ATTRIBUTE_FORMAT */ + +#define HAVE_CONSTRUCTOR_ATTRIBUTE +#ifdef HAVE_CONSTRUCTOR_ATTRIBUTE +#define CONSTRUCTOR_ATTRIBUTE __attribute__ ((constructor)) +#else +#define CONSTRUCTOR_ATTRIBUTE +#endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */ + +#define HAVE_DESTRUCTOR_ATTRIBUTE +#ifdef HAVE_DESTRUCTOR_ATTRIBUTE +#define DESTRUCTOR_ATTRIBUTE __attribute__ ((destructor)) +#else +#define DESTRUCTOR_ATTRIBUTE +#endif + +#define HAVE_ADDRESS_SANITIZER_ATTRIBUTE +#ifdef HAVE_ADDRESS_SANITIZER_ATTRIBUTE +#define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE __attribute__((no_sanitize_address)) +#else +#define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE +#endif + +/* + * IMPORTANT + * + * Functions especially from libc need to be loaded individually, you can't load + * all at once or gdb will segfault at startup. The same applies to valgrind and + * has probably something todo with with the linker. + * So we need load each function at the point it is called the first time. + */ +#ifdef HAVE_ACCEPT4 +int +libc_accept4 (int sockfd, + struct sockaddr *addr, socklen_t * addrlen, int flags); +#else /* HAVE_ACCEPT4 */ +int libc_accept (int sockfd, struct sockaddr *addr, socklen_t * addrlen); +#endif /* HAVE_ACCEPT4 */ + +int libc_bind (int sockfd, const struct sockaddr *addr, socklen_t addrlen); + +int libc_close (int fd); + +int libc_connect (int sockfd, const struct sockaddr *addr, socklen_t addrlen); + +#if 0 +/* TBD: dup and dup2 to be implemented later */ +int libc_dup (int fd); + +int libc_dup2 (int oldfd, int newfd); +#endif + +#ifdef HAVE_EVENTFD +int libc_eventfd (int count, int flags); +#endif + +DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE int +libc_vfcntl (int fd, int cmd, va_list ap); + +DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE int +libc_vioctl (int fd, int cmd, va_list ap); + +int libc_getpeername (int sockfd, struct sockaddr *addr, socklen_t * addrlen); + +int libc_getsockname (int sockfd, struct sockaddr *addr, socklen_t * addrlen); + +int +libc_getsockopt (int sockfd, + int level, int optname, void *optval, socklen_t * optlen); + +int libc_listen (int sockfd, int backlog); + +int libc_read (int fd, void *buf, size_t count); + +ssize_t libc_readv (int fd, const struct iovec *iov, int iovcnt); + +int libc_recv (int sockfd, void *buf, size_t len, int flags); + +int +libc_recvfrom (int sockfd, + void *buf, + size_t len, + int flags, struct sockaddr *src_addr, socklen_t * addrlen); + +int libc_recvmsg (int sockfd, struct msghdr *msg, int flags); + +int libc_send (int sockfd, const void *buf, size_t len, int flags); + +int libc_sendmsg (int sockfd, const struct msghdr *msg, int flags); + +int +libc_sendto (int sockfd, + const void *buf, + size_t len, + int flags, const struct sockaddr *dst_addr, socklen_t addrlen); + +int +libc_setsockopt (int sockfd, + int level, int optname, const void *optval, + socklen_t optlen); + +int libc_socket (int domain, int type, int protocol); + +int libc_socketpair (int domain, int type, int protocol, int sv[2]); + +ssize_t libc_write (int fd, const void *buf, size_t count); + +ssize_t libc_writev (int fd, const struct iovec *iov, int iovcnt); + +int libc_shutdown (int fd, int how); + +int +libc_select (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + struct timeval *__restrict __timeout); + +#ifdef __USE_XOPEN2K +int +libc_pselect (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + const struct timespec *__restrict __timeout, + const __sigset_t * __restrict __sigmask); +#endif + +int libc_epoll_create (int __size); + +int libc_epoll_create1 (int __flags); + +int libc_epoll_ctl (int __epfd, int __op, int __fd, + struct epoll_event *__event); + +int libc_epoll_wait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout); + +int libc_epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, + const __sigset_t * __ss); + +int libc_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout); + +#ifdef __USE_GNU +int libc_ppoll (struct pollfd *__fds, nfds_t __nfds, + const struct timespec *__timeout, const __sigset_t * __ss); +#endif + +void swrap_constructor (void); + +void swrap_destructor (void); + +#endif /* included_vcom_socket_wrapper_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vcl/vppcom.c b/src/vcl/vppcom.c new file mode 100644 index 00000000000..0f30c60c803 --- /dev/null +++ b/src/vcl/vppcom.c @@ -0,0 +1,3258 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <svm/svm_fifo_segment.h> +#include <vlibmemory/api.h> +#include <vpp/api/vpe_msg_enum.h> +#include <vnet/session/application_interface.h> +#include <vcl/vppcom.h> +#include <vlib/unix/unix.h> +#include <vppinfra/vec_bootstrap.h> + +#define vl_typedefs /* define message structures */ +#include <vpp/api/vpe_all_api_h.h> +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include <vpp/api/vpe_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include <vpp/api/vpe_all_api_h.h> +#undef vl_printfun + +#if (CLIB_DEBUG > 0) +/* Set VPPCOM_DEBUG 2 for connection debug, 3 for read/write debug output */ +#define VPPCOM_DEBUG 1 +#else +#define VPPCOM_DEBUG 0 +#endif + +/* + * VPPCOM Private definitions and functions. + */ +typedef enum +{ + STATE_APP_START, + STATE_APP_CONN_VPP, + STATE_APP_ENABLED, + STATE_APP_ATTACHED, +} app_state_t; + +typedef enum +{ + STATE_START, + STATE_CONNECT, + STATE_LISTEN, + STATE_ACCEPT, + STATE_DISCONNECT, + STATE_FAILED +} session_state_t; + +typedef struct epoll_event vppcom_epoll_event_t; + +typedef struct +{ + u32 next_sid; + u32 prev_sid; + u32 vep_idx; + vppcom_epoll_event_t ev; +#define VEP_DEFAULT_ET_MASK (EPOLLIN|EPOLLOUT) + u32 et_mask; +} vppcom_epoll_t; + +typedef struct +{ + u8 is_ip4; + ip46_address_t ip46; +} vppcom_ip46_t; + +typedef struct +{ + volatile session_state_t state; + + svm_fifo_t *server_rx_fifo; + svm_fifo_t *server_tx_fifo; + u32 sm_seg_index; + u64 vpp_session_handle; + unix_shared_memory_queue_t *vpp_event_queue; + + /* Socket configuration state */ + /* TBD: covert 'is_*' vars to bit in u8 flags; */ + u8 is_server; + u8 is_listen; + u8 is_cut_thru; + u8 is_nonblocking; + u8 is_vep; + u8 is_vep_session; + u32 wait_cont_idx; + vppcom_epoll_t vep; + u32 vrf; + vppcom_ip46_t lcl_addr; + vppcom_ip46_t peer_addr; + u16 port; + u8 proto; + u64 client_queue_address; + u64 options[16]; +} session_t; + +typedef struct vppcom_cfg_t_ +{ + u64 heapsize; + u64 segment_baseva; + u32 segment_size; + u32 add_segment_size; + u32 preallocated_fifo_pairs; + u32 rx_fifo_size; + u32 tx_fifo_size; + u32 event_queue_size; + u32 listen_queue_size; + f64 app_timeout; + f64 session_timeout; + f64 accept_timeout; +} vppcom_cfg_t; + +typedef struct vppcom_main_t_ +{ + u8 init; + u32 *client_session_index_fifo; + volatile u32 bind_session_index; + int main_cpu; + + /* vpe input queue */ + unix_shared_memory_queue_t *vl_input_queue; + + /* API client handle */ + u32 my_client_index; + + /* Session pool */ + clib_spinlock_t sessions_lockp; + session_t *sessions; + + /* Hash table for disconnect processing */ + uword *session_index_by_vpp_handles; + + /* Select bitmaps */ + clib_bitmap_t *rd_bitmap; + clib_bitmap_t *wr_bitmap; + clib_bitmap_t *ex_bitmap; + + /* Our event queue */ + unix_shared_memory_queue_t *app_event_queue; + + /* unique segment name counter */ + u32 unique_segment_index; + + pid_t my_pid; + + /* For deadman timers */ + clib_time_t clib_time; + + /* State of the connection, shared between msg RX thread and main thread */ + volatile app_state_t app_state; + + vppcom_cfg_t cfg; + + /* VNET_API_ERROR_FOO -> "Foo" hash table */ + uword *error_string_by_error_number; +} vppcom_main_t; + +vppcom_main_t vppcom_main = {.my_client_index = ~0 }; + +static const char * +vppcom_app_state_str (app_state_t state) +{ + char *st; + + switch (state) + { + case STATE_APP_START: + st = "STATE_APP_START"; + break; + + case STATE_APP_CONN_VPP: + st = "STATE_APP_CONN_VPP"; + break; + + case STATE_APP_ENABLED: + st = "STATE_APP_ENABLED"; + break; + + case STATE_APP_ATTACHED: + st = "STATE_APP_ATTACHED"; + break; + + default: + st = "UNKNOWN_APP_STATE"; + break; + } + + return st; +} + +static const char * +vppcom_session_state_str (session_state_t state) +{ + char *st; + + switch (state) + { + case STATE_START: + st = "STATE_START"; + break; + + case STATE_CONNECT: + st = "STATE_CONNECT"; + break; + + case STATE_LISTEN: + st = "STATE_LISTEN"; + break; + + case STATE_ACCEPT: + st = "STATE_ACCEPT"; + break; + + case STATE_DISCONNECT: + st = "STATE_DISCONNECT"; + break; + + case STATE_FAILED: + st = "STATE_FAILED"; + break; + + default: + st = "UNKNOWN_STATE"; + break; + } + + return st; +} + +/* + * VPPCOM Utility Functions + */ +static inline int +vppcom_session_at_index (u32 session_index, session_t * volatile *sess) +{ + vppcom_main_t *vcm = &vppcom_main; + + /* Assumes that caller has acquired spinlock: vcm->sessions_lockp */ + if (PREDICT_FALSE ((session_index == ~0) || + pool_is_free_index (vcm->sessions, session_index))) + { + clib_warning ("[%d] invalid session, sid (%u) has been closed!", + vcm->my_pid, session_index); + return VPPCOM_EBADFD; + } + *sess = pool_elt_at_index (vcm->sessions, session_index); + return VPPCOM_OK; +} + +static int +vppcom_connect_to_vpp (char *app_name) +{ + api_main_t *am = &api_main; + vppcom_main_t *vcm = &vppcom_main; + + if (VPPCOM_DEBUG > 0) + printf ("\nConnecting to VPP api..."); + if (vl_client_connect_to_vlib ("/vpe-api", app_name, 32) < 0) + { + clib_warning ("[%d] connect to vpp (%s) failed!", + vcm->my_pid, app_name); + return VPPCOM_ECONNREFUSED; + } + + vcm->vl_input_queue = am->shmem_hdr->vl_input_queue; + vcm->my_client_index = am->my_client_index; + if (VPPCOM_DEBUG > 0) + printf (" connected!\n"); + + vcm->app_state = STATE_APP_CONN_VPP; + return VPPCOM_OK; +} + +static u8 * +format_api_error (u8 * s, va_list * args) +{ + vppcom_main_t *vcm = &vppcom_main; + i32 error = va_arg (*args, u32); + uword *p; + + p = hash_get (vcm->error_string_by_error_number, -error); + + if (p) + s = format (s, "%s (%d)", p[0], error); + else + s = format (s, "%d", error); + return s; +} + +static void +vppcom_init_error_string_table (void) +{ + vppcom_main_t *vcm = &vppcom_main; + + vcm->error_string_by_error_number = hash_create (0, sizeof (uword)); + +#define _(n,v,s) hash_set (vcm->error_string_by_error_number, -v, s); + foreach_vnet_api_error; +#undef _ + + hash_set (vcm->error_string_by_error_number, 99, "Misc"); +} + +static inline int +vppcom_wait_for_app_state_change (app_state_t app_state) +{ + vppcom_main_t *vcm = &vppcom_main; + f64 timeout = clib_time_now (&vcm->clib_time) + vcm->cfg.app_timeout; + + while (clib_time_now (&vcm->clib_time) < timeout) + { + if (vcm->app_state == app_state) + return VPPCOM_OK; + } + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] timeout waiting for state %s (%d)", vcm->my_pid, + vppcom_app_state_str (app_state), app_state); + return VPPCOM_ETIMEDOUT; +} + +static inline int +vppcom_wait_for_session_state_change (u32 session_index, + session_state_t state, + f64 wait_for_time) +{ + vppcom_main_t *vcm = &vppcom_main; + f64 timeout = clib_time_now (&vcm->clib_time) + wait_for_time; + session_t *volatile session; + int rv; + + do + { + clib_spinlock_lock (&vcm->sessions_lockp); + rv = vppcom_session_at_index (session_index, &session); + if (PREDICT_FALSE (rv)) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + return rv; + } + if (session->state == state) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + return VPPCOM_OK; + } + clib_spinlock_unlock (&vcm->sessions_lockp); + } + while (clib_time_now (&vcm->clib_time) < timeout); + + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] timeout waiting for state %s (%d)", vcm->my_pid, + vppcom_session_state_str (state), state); + return VPPCOM_ETIMEDOUT; +} + +static inline int +vppcom_wait_for_client_session_index (f64 wait_for_time) +{ + vppcom_main_t *vcm = &vppcom_main; + f64 timeout = clib_time_now (&vcm->clib_time) + wait_for_time; + + do + { + if (clib_fifo_elts (vcm->client_session_index_fifo)) + return VPPCOM_OK; + } + while (clib_time_now (&vcm->clib_time) < timeout); + + if (wait_for_time == 0) + return VPPCOM_EAGAIN; + + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] timeout waiting for client_session_index", + vcm->my_pid); + return VPPCOM_ETIMEDOUT; +} + +/* + * VPP-API message functions + */ +static void +vppcom_send_session_enable_disable (u8 is_enable) +{ + vppcom_main_t *vcm = &vppcom_main; + vl_api_session_enable_disable_t *bmp; + bmp = vl_msg_api_alloc (sizeof (*bmp)); + memset (bmp, 0, sizeof (*bmp)); + + bmp->_vl_msg_id = ntohs (VL_API_SESSION_ENABLE_DISABLE); + bmp->client_index = vcm->my_client_index; + bmp->context = htonl (0xfeedface); + bmp->is_enable = is_enable; + vl_msg_api_send_shmem (vcm->vl_input_queue, (u8 *) & bmp); +} + +static int +vppcom_app_session_enable (void) +{ + vppcom_main_t *vcm = &vppcom_main; + int rv; + + if (vcm->app_state != STATE_APP_ENABLED) + { + vppcom_send_session_enable_disable (1 /* is_enabled == TRUE */ ); + rv = vppcom_wait_for_app_state_change (STATE_APP_ENABLED); + if (PREDICT_FALSE (rv)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] Session enable timed out, rv = %s (%d)", + vcm->my_pid, vppcom_retval_str (rv), rv); + return rv; + } + } + return VPPCOM_OK; +} + +static void + vl_api_session_enable_disable_reply_t_handler + (vl_api_session_enable_disable_reply_t * mp) +{ + vppcom_main_t *vcm = &vppcom_main; + + if (mp->retval) + { + clib_warning ("[%d] session_enable_disable failed: %U", vcm->my_pid, + format_api_error, ntohl (mp->retval)); + } + else + vcm->app_state = STATE_APP_ENABLED; +} + +static void +vppcom_app_send_attach (void) +{ + vppcom_main_t *vcm = &vppcom_main; + vl_api_application_attach_t *bmp; + bmp = vl_msg_api_alloc (sizeof (*bmp)); + memset (bmp, 0, sizeof (*bmp)); + + bmp->_vl_msg_id = ntohs (VL_API_APPLICATION_ATTACH); + bmp->client_index = vcm->my_client_index; + bmp->context = htonl (0xfeedface); + bmp->options[APP_OPTIONS_FLAGS] = + APP_OPTIONS_FLAGS_ACCEPT_REDIRECT | APP_OPTIONS_FLAGS_ADD_SEGMENT | + APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE | APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE; + bmp->options[SESSION_OPTIONS_SEGMENT_SIZE] = vcm->cfg.segment_size; + bmp->options[SESSION_OPTIONS_ADD_SEGMENT_SIZE] = vcm->cfg.add_segment_size; + bmp->options[SESSION_OPTIONS_RX_FIFO_SIZE] = vcm->cfg.rx_fifo_size; + bmp->options[SESSION_OPTIONS_TX_FIFO_SIZE] = vcm->cfg.tx_fifo_size; + vl_msg_api_send_shmem (vcm->vl_input_queue, (u8 *) & bmp); +} + +static int +vppcom_app_attach (void) +{ + vppcom_main_t *vcm = &vppcom_main; + int rv; + + vppcom_app_send_attach (); + rv = vppcom_wait_for_app_state_change (STATE_APP_ATTACHED); + if (PREDICT_FALSE (rv)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] application attach timed out, rv = %s (%d)", + vcm->my_pid, vppcom_retval_str (rv), rv); + return rv; + } + return VPPCOM_OK; +} + +static void +vppcom_app_detach (void) +{ + vppcom_main_t *vcm = &vppcom_main; + vl_api_application_detach_t *bmp; + bmp = vl_msg_api_alloc (sizeof (*bmp)); + memset (bmp, 0, sizeof (*bmp)); + + bmp->_vl_msg_id = ntohs (VL_API_APPLICATION_DETACH); + bmp->client_index = vcm->my_client_index; + bmp->context = htonl (0xfeedface); + vl_msg_api_send_shmem (vcm->vl_input_queue, (u8 *) & bmp); +} + +static void +vl_api_application_attach_reply_t_handler (vl_api_application_attach_reply_t * + mp) +{ + vppcom_main_t *vcm = &vppcom_main; + static svm_fifo_segment_create_args_t _a; + svm_fifo_segment_create_args_t *a = &_a; + int rv; + + memset (a, 0, sizeof (*a)); + if (mp->retval) + { + clib_warning ("[%d] attach failed: %U", vcm->my_pid, + format_api_error, ntohl (mp->retval)); + return; + } + + if (mp->segment_name_length == 0) + { + clib_warning ("[%d] segment_name_length zero", vcm->my_pid); + return; + } + + a->segment_name = (char *) mp->segment_name; + a->segment_size = mp->segment_size; + + ASSERT (mp->app_event_queue_address); + + /* Attach to the segment vpp created */ + rv = svm_fifo_segment_attach (a); + vec_reset_length (a->new_segment_indices); + if (PREDICT_FALSE (rv)) + { + clib_warning ("[%d] svm_fifo_segment_attach ('%s') failed", vcm->my_pid, + mp->segment_name); + return; + } + + vcm->app_event_queue = + uword_to_pointer (mp->app_event_queue_address, + unix_shared_memory_queue_t *); + + vcm->app_state = STATE_APP_ATTACHED; +} + +static void +vl_api_application_detach_reply_t_handler (vl_api_application_detach_reply_t * + mp) +{ + vppcom_main_t *vcm = &vppcom_main; + + if (mp->retval) + clib_warning ("[%d] detach failed: %U", vcm->my_pid, format_api_error, + ntohl (mp->retval)); + + vcm->app_state = STATE_APP_ENABLED; +} + +static void +vl_api_disconnect_session_reply_t_handler (vl_api_disconnect_session_reply_t * + mp) +{ + vppcom_main_t *vcm = &vppcom_main; + uword *p; + + p = hash_get (vcm->session_index_by_vpp_handles, mp->handle); + if (p) + { + session_t *session = 0; + int rv; + clib_spinlock_lock (&vcm->sessions_lockp); + rv = vppcom_session_at_index (p[0], &session); + if (PREDICT_FALSE (rv)) + { + if (VPPCOM_DEBUG > 1) + clib_warning ("[%d] invalid session, sid (%u) has been closed!", + vcm->my_pid, p[0]); + } + hash_unset (vcm->session_index_by_vpp_handles, mp->handle); + session->state = STATE_DISCONNECT; + clib_spinlock_unlock (&vcm->sessions_lockp); + } + else + { + if (VPPCOM_DEBUG > 1) + clib_warning ("[%d] couldn't find session key %llx", vcm->my_pid, + mp->handle); + } + + if (mp->retval) + clib_warning ("[%d] disconnect_session failed: %U", vcm->my_pid, + format_api_error, ntohl (mp->retval)); +} + +static void +vl_api_map_another_segment_t_handler (vl_api_map_another_segment_t * mp) +{ + vppcom_main_t *vcm = &vppcom_main; + static svm_fifo_segment_create_args_t _a; + svm_fifo_segment_create_args_t *a = &_a; + int rv; + + memset (a, 0, sizeof (*a)); + a->segment_name = (char *) mp->segment_name; + a->segment_size = mp->segment_size; + /* Attach to the segment vpp created */ + rv = svm_fifo_segment_attach (a); + vec_reset_length (a->new_segment_indices); + if (PREDICT_FALSE (rv)) + { + clib_warning ("[%d] svm_fifo_segment_attach ('%s') failed", + vcm->my_pid, mp->segment_name); + return; + } + if (VPPCOM_DEBUG > 1) + clib_warning ("[%d] mapped new segment '%s' size %d", vcm->my_pid, + mp->segment_name, mp->segment_size); +} + +static void +vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp) +{ + vppcom_main_t *vcm = &vppcom_main; + session_t *session = 0; + vl_api_disconnect_session_reply_t *rmp; + uword *p; + int rv = 0; + + p = hash_get (vcm->session_index_by_vpp_handles, mp->handle); + if (p) + { + int rval; + clib_spinlock_lock (&vcm->sessions_lockp); + rval = vppcom_session_at_index (p[0], &session); + if (PREDICT_FALSE (rval)) + { + if (VPPCOM_DEBUG > 1) + clib_warning ("[%d] invalid session, sid (%u) has been closed!", + vcm->my_pid, p[0]); + } + else + pool_put (vcm->sessions, session); + clib_spinlock_unlock (&vcm->sessions_lockp); + hash_unset (vcm->session_index_by_vpp_handles, mp->handle); + } + else + { + clib_warning ("[%d] couldn't find session key %llx", vcm->my_pid, + mp->handle); + rv = -11; + } + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + + rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY); + rmp->retval = htonl (rv); + rmp->handle = mp->handle; + vl_msg_api_send_shmem (vcm->vl_input_queue, (u8 *) & rmp); +} + +static void +vl_api_reset_session_t_handler (vl_api_reset_session_t * mp) +{ + vppcom_main_t *vcm = &vppcom_main; + session_t *session = 0; + vl_api_reset_session_reply_t *rmp; + uword *p; + int rv = 0; + + p = hash_get (vcm->session_index_by_vpp_handles, mp->handle); + if (p) + { + int rval; + clib_spinlock_lock (&vcm->sessions_lockp); + rval = vppcom_session_at_index (p[0], &session); + if (PREDICT_FALSE (rval)) + { + if (VPPCOM_DEBUG > 1) + clib_warning ("[%d] invalid session, sid (%u) has been closed!", + vcm->my_pid, p[0]); + } + else + pool_put (vcm->sessions, session); + clib_spinlock_unlock (&vcm->sessions_lockp); + hash_unset (vcm->session_index_by_vpp_handles, mp->handle); + } + else + { + clib_warning ("[%d] couldn't find session key %llx", vcm->my_pid, + mp->handle); + rv = -11; + } + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_RESET_SESSION_REPLY); + rmp->retval = htonl (rv); + rmp->handle = mp->handle; + vl_msg_api_send_shmem (vcm->vl_input_queue, (u8 *) & rmp); +} + +static void +vl_api_connect_session_reply_t_handler (vl_api_connect_session_reply_t * mp) +{ + vppcom_main_t *vcm = &vppcom_main; + session_t *session; + u32 session_index; + svm_fifo_t *rx_fifo, *tx_fifo; + u8 is_cut_thru = 0; + int rv; + + if (mp->retval) + { + clib_warning ("[%d] connect failed: %U", vcm->my_pid, format_api_error, + ntohl (mp->retval)); + return; + } + + session_index = mp->context; + if (VPPCOM_DEBUG > 1) + clib_warning ("[%d] session_index = %d 0x%08x", vcm->my_pid, + session_index, session_index); + + clib_spinlock_lock (&vcm->sessions_lockp); + if (pool_is_free_index (vcm->sessions, session_index)) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 1) + clib_warning ("[%d] invalid session, sid %d is closed!", + vcm->my_pid, session_index); + return; + } + + /* We've been redirected */ + if (mp->segment_name_length > 0) + { + static svm_fifo_segment_create_args_t _a; + svm_fifo_segment_create_args_t *a = &_a; + + is_cut_thru = 1; + memset (a, 0, sizeof (*a)); + a->segment_name = (char *) mp->segment_name; + if (VPPCOM_DEBUG > 1) + clib_warning ("[%d] cut-thru segment: %s", vcm->my_pid, + a->segment_name); + rv = svm_fifo_segment_attach (a); + vec_reset_length (a->new_segment_indices); + if (PREDICT_FALSE (rv)) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + clib_warning ("[%d] sm_fifo_segment_attach ('%s') failed", + vcm->my_pid, a->segment_name); + return; + } + } + + /* + * Setup session + */ + if (VPPCOM_DEBUG > 1) + clib_warning ("[%d] client sid %d", vcm->my_pid, session_index); + + session = pool_elt_at_index (vcm->sessions, session_index); + session->is_cut_thru = is_cut_thru; + session->vpp_event_queue = uword_to_pointer (mp->vpp_event_queue_address, + unix_shared_memory_queue_t *); + + rx_fifo = uword_to_pointer (mp->server_rx_fifo, svm_fifo_t *); + rx_fifo->client_session_index = session_index; + tx_fifo = uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *); + tx_fifo->client_session_index = session_index; + + session->server_rx_fifo = rx_fifo; + session->server_tx_fifo = tx_fifo; + session->vpp_session_handle = mp->handle; + session->state = STATE_CONNECT; + + /* Add it to lookup table */ + hash_set (vcm->session_index_by_vpp_handles, mp->handle, session_index); + clib_spinlock_unlock (&vcm->sessions_lockp); +} + +static void +vppcom_send_connect_sock (session_t * session, u32 session_index) +{ + vppcom_main_t *vcm = &vppcom_main; + vl_api_connect_sock_t *cmp; + + /* Assumes caller as acquired the spinlock: vcm->sessions_lockp */ + session->is_server = 0; + cmp = vl_msg_api_alloc (sizeof (*cmp)); + memset (cmp, 0, sizeof (*cmp)); + cmp->_vl_msg_id = ntohs (VL_API_CONNECT_SOCK); + cmp->client_index = vcm->my_client_index; + cmp->context = session_index; + + if (VPPCOM_DEBUG > 1) + clib_warning ("[%d] session_index = %d 0x%08x", + vcm->my_pid, session_index, session_index); + + cmp->vrf = session->vrf; + cmp->is_ip4 = session->peer_addr.is_ip4; + clib_memcpy (cmp->ip, &session->peer_addr.ip46, sizeof (cmp->ip)); + cmp->port = session->port; + cmp->proto = session->proto; + clib_memcpy (cmp->options, session->options, sizeof (cmp->options)); + vl_msg_api_send_shmem (vcm->vl_input_queue, (u8 *) & cmp); +} + +static int +vppcom_send_disconnect (u32 session_index) +{ + vppcom_main_t *vcm = &vppcom_main; + vl_api_disconnect_session_t *dmp; + session_t *session = 0; + int rv; + + clib_spinlock_lock (&vcm->sessions_lockp); + rv = vppcom_session_at_index (session_index, &session); + if (PREDICT_FALSE (rv)) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 1) + clib_warning ("[%d] invalid session, sid (%u) has been closed!", + vcm->my_pid, session_index); + return rv; + } + + dmp = vl_msg_api_alloc (sizeof (*dmp)); + memset (dmp, 0, sizeof (*dmp)); + dmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION); + dmp->client_index = vcm->my_client_index; + dmp->handle = session->vpp_session_handle; + clib_spinlock_unlock (&vcm->sessions_lockp); + vl_msg_api_send_shmem (vcm->vl_input_queue, (u8 *) & dmp); + return VPPCOM_OK; +} + +static void +vl_api_bind_sock_reply_t_handler (vl_api_bind_sock_reply_t * mp) +{ + vppcom_main_t *vcm = &vppcom_main; + session_t *session = 0; + int rv; + + if (mp->retval) + clib_warning ("[%d] bind failed: %U", vcm->my_pid, format_api_error, + ntohl (mp->retval)); + + ASSERT (vcm->bind_session_index != ~0); + + clib_spinlock_lock (&vcm->sessions_lockp); + rv = vppcom_session_at_index (vcm->bind_session_index, &session); + if (rv == VPPCOM_OK) + { + session->vpp_session_handle = mp->handle; + hash_set (vcm->session_index_by_vpp_handles, mp->handle, + vcm->bind_session_index); + session->state = mp->retval ? STATE_FAILED : STATE_LISTEN; + vcm->bind_session_index = ~0; + } + clib_spinlock_unlock (&vcm->sessions_lockp); +} + +static void +vl_api_unbind_sock_reply_t_handler (vl_api_unbind_sock_reply_t * mp) +{ + vppcom_main_t *vcm = &vppcom_main; + session_t *session = 0; + int rv; + + clib_spinlock_lock (&vcm->sessions_lockp); + rv = vppcom_session_at_index (vcm->bind_session_index, &session); + if (rv == VPPCOM_OK) + { + if ((VPPCOM_DEBUG > 1) && (mp->retval)) + clib_warning ("[%d] unbind failed: %U", vcm->my_pid, format_api_error, + ntohl (mp->retval)); + + vcm->bind_session_index = ~0; + session->state = STATE_START; + } + clib_spinlock_unlock (&vcm->sessions_lockp); +} + +u8 * +format_ip4_address (u8 * s, va_list * args) +{ + u8 *a = va_arg (*args, u8 *); + return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]); +} + +u8 * +format_ip6_address (u8 * s, va_list * args) +{ + ip6_address_t *a = va_arg (*args, ip6_address_t *); + u32 i, i_max_n_zero, max_n_zeros, i_first_zero, n_zeros, last_double_colon; + + i_max_n_zero = ARRAY_LEN (a->as_u16); + max_n_zeros = 0; + i_first_zero = i_max_n_zero; + n_zeros = 0; + for (i = 0; i < ARRAY_LEN (a->as_u16); i++) + { + u32 is_zero = a->as_u16[i] == 0; + if (is_zero && i_first_zero >= ARRAY_LEN (a->as_u16)) + { + i_first_zero = i; + n_zeros = 0; + } + n_zeros += is_zero; + if ((!is_zero && n_zeros > max_n_zeros) + || (i + 1 >= ARRAY_LEN (a->as_u16) && n_zeros > max_n_zeros)) + { + i_max_n_zero = i_first_zero; + max_n_zeros = n_zeros; + i_first_zero = ARRAY_LEN (a->as_u16); + n_zeros = 0; + } + } + + last_double_colon = 0; + for (i = 0; i < ARRAY_LEN (a->as_u16); i++) + { + if (i == i_max_n_zero && max_n_zeros > 1) + { + s = format (s, "::"); + i += max_n_zeros - 1; + last_double_colon = 1; + } + else + { + s = format (s, "%s%x", + (last_double_colon || i == 0) ? "" : ":", + clib_net_to_host_u16 (a->as_u16[i])); + last_double_colon = 0; + } + } + + return s; +} + +/* Format an IP46 address. */ +u8 * +format_ip46_address (u8 * s, va_list * args) +{ + ip46_address_t *ip46 = va_arg (*args, ip46_address_t *); + ip46_type_t type = va_arg (*args, ip46_type_t); + int is_ip4 = 1; + + switch (type) + { + case IP46_TYPE_ANY: + is_ip4 = ip46_address_is_ip4 (ip46); + break; + case IP46_TYPE_IP4: + is_ip4 = 1; + break; + case IP46_TYPE_IP6: + is_ip4 = 0; + break; + } + + return is_ip4 ? + format (s, "%U", format_ip4_address, &ip46->ip4) : + format (s, "%U", format_ip6_address, &ip46->ip6); +} + +static void +vl_api_accept_session_t_handler (vl_api_accept_session_t * mp) +{ + vppcom_main_t *vcm = &vppcom_main; + vl_api_accept_session_reply_t *rmp; + svm_fifo_t *rx_fifo, *tx_fifo; + session_t *session; + u32 session_index; + int rv = 0; + + if (!clib_fifo_free_elts (vcm->client_session_index_fifo)) + { + clib_warning ("[%d] client session queue is full!", vcm->my_pid); + rv = VNET_API_ERROR_QUEUE_FULL; + goto send_reply; + } + + if (VPPCOM_DEBUG > 1) + { + u8 *ip_str = format (0, "%U", format_ip46_address, &mp->ip, mp->is_ip4); + clib_warning ("[%d] accepted session from: %s:%d", vcm->my_pid, ip_str, + clib_net_to_host_u16 (mp->port)); + vec_free (ip_str); + } + + clib_spinlock_lock (&vcm->sessions_lockp); + /* Allocate local session and set it up */ + pool_get (vcm->sessions, session); + memset (session, 0, sizeof (*session)); + session_index = session - vcm->sessions; + + rx_fifo = uword_to_pointer (mp->server_rx_fifo, svm_fifo_t *); + rx_fifo->client_session_index = session_index; + tx_fifo = uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *); + tx_fifo->client_session_index = session_index; + + session->server_rx_fifo = rx_fifo; + session->server_tx_fifo = tx_fifo; + session->vpp_event_queue = uword_to_pointer (mp->vpp_event_queue_address, + unix_shared_memory_queue_t *); + session->state = STATE_ACCEPT; + session->is_cut_thru = 0; + session->is_server = 1; + session->port = mp->port; + session->peer_addr.is_ip4 = mp->is_ip4; + clib_memcpy (&session->peer_addr.ip46, mp->ip, + sizeof (session->peer_addr.ip46)); + + /* Add it to lookup table */ + hash_set (vcm->session_index_by_vpp_handles, mp->handle, session_index); + + clib_fifo_add1 (vcm->client_session_index_fifo, session_index); + clib_spinlock_unlock (&vcm->sessions_lockp); + + /* + * Send accept reply to vpp + */ +send_reply: + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_ACCEPT_SESSION_REPLY); + rmp->retval = htonl (rv); + rmp->handle = mp->handle; + vl_msg_api_send_shmem (vcm->vl_input_queue, (u8 *) & rmp); +} + +/* + * Acting as server for redirected connect requests + */ +static void +vl_api_connect_sock_t_handler (vl_api_connect_sock_t * mp) +{ + static svm_fifo_segment_create_args_t _a; + svm_fifo_segment_create_args_t *a = &_a; + vppcom_main_t *vcm = &vppcom_main; + u32 session_index; + svm_fifo_segment_private_t *seg; + unix_shared_memory_queue_t *client_q; + vl_api_connect_session_reply_t *rmp; + session_t *session = 0; + int rv = 0; + svm_fifo_t *rx_fifo; + svm_fifo_t *tx_fifo; + unix_shared_memory_queue_t *event_q = 0; + + clib_spinlock_lock (&vcm->sessions_lockp); + if (!clib_fifo_free_elts (vcm->client_session_index_fifo)) + { + if (VPPCOM_DEBUG > 1) + clib_warning ("[%d] client session queue is full!", vcm->my_pid); + rv = VNET_API_ERROR_QUEUE_FULL; + clib_spinlock_unlock (&vcm->sessions_lockp); + goto send_reply; + } + + /* Create the segment */ + memset (a, 0, sizeof (*a)); + a->segment_name = (char *) format ((u8 *) a->segment_name, "%d:segment%d%c", + vcm->my_pid, vcm->unique_segment_index++, + 0); + a->segment_size = vcm->cfg.segment_size; + a->preallocated_fifo_pairs = vcm->cfg.preallocated_fifo_pairs; + a->rx_fifo_size = vcm->cfg.rx_fifo_size; + a->tx_fifo_size = vcm->cfg.tx_fifo_size; + + rv = svm_fifo_segment_create (a); + if (PREDICT_FALSE (rv)) + { + if (VPPCOM_DEBUG > 1) + clib_warning ("[%d] svm_fifo_segment_create ('%s') failed", + vcm->my_pid, a->segment_name); + vec_reset_length (a->new_segment_indices); + rv = VNET_API_ERROR_URI_FIFO_CREATE_FAILED; + goto send_reply; + } + + if (VPPCOM_DEBUG > 1) + clib_warning ("[%d] created segment '%s'", vcm->my_pid, a->segment_name); + + pool_get (vcm->sessions, session); + memset (session, 0, sizeof (*session)); + session_index = session - vcm->sessions; + + session->sm_seg_index = a->new_segment_indices[0]; + vec_reset_length (a->new_segment_indices); + + seg = svm_fifo_segment_get_segment (session->sm_seg_index); + rx_fifo = session->server_rx_fifo = + svm_fifo_segment_alloc_fifo (seg, vcm->cfg.rx_fifo_size, + FIFO_SEGMENT_RX_FREELIST); + if (PREDICT_FALSE (!session->server_rx_fifo)) + { + svm_fifo_segment_delete (seg); + clib_warning ("[%d] rx fifo alloc failed, size %ld (0x%lx)", + vcm->my_pid, vcm->cfg.rx_fifo_size, + vcm->cfg.rx_fifo_size); + rv = VNET_API_ERROR_URI_FIFO_CREATE_FAILED; + clib_spinlock_unlock (&vcm->sessions_lockp); + goto send_reply; + } + + tx_fifo = session->server_tx_fifo = + svm_fifo_segment_alloc_fifo (seg, vcm->cfg.tx_fifo_size, + FIFO_SEGMENT_TX_FREELIST); + if (PREDICT_FALSE (!session->server_tx_fifo)) + { + svm_fifo_segment_delete (seg); + if (VPPCOM_DEBUG > 1) + clib_warning ("[%d] tx fifo alloc failed, size %ld (0x%lx)", + vcm->my_pid, vcm->cfg.tx_fifo_size, + vcm->cfg.tx_fifo_size); + rv = VNET_API_ERROR_URI_FIFO_CREATE_FAILED; + clib_spinlock_unlock (&vcm->sessions_lockp); + goto send_reply; + } + + session->server_rx_fifo->master_session_index = session_index; + session->server_tx_fifo->master_session_index = session_index; + session->client_queue_address = mp->client_queue_address; + session->is_cut_thru = 1; + session->is_server = 1; + session->port = mp->port; + session->peer_addr.is_ip4 = mp->is_ip4; + clib_memcpy (&session->peer_addr.ip46, mp->ip, + sizeof (session->peer_addr.ip46)); + { + void *oldheap; + ssvm_shared_header_t *sh = seg->ssvm.sh; + + ssvm_lock_non_recursive (sh, 1); + oldheap = ssvm_push_heap (sh); + event_q = session->vpp_event_queue = + unix_shared_memory_queue_init (vcm->cfg.event_queue_size, + sizeof (session_fifo_event_t), + vcm->my_pid, 0 /* signal not sent */ ); + ssvm_pop_heap (oldheap); + ssvm_unlock_non_recursive (sh); + } + + session->state = STATE_ACCEPT; + if (VPPCOM_DEBUG > 1) + clib_warning ("[%d] Connected cut-thru to client: sid %d", + vcm->my_pid, session_index); + clib_fifo_add1 (vcm->client_session_index_fifo, session_index); + clib_spinlock_unlock (&vcm->sessions_lockp); + +send_reply: + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + + rmp->_vl_msg_id = ntohs (VL_API_CONNECT_SESSION_REPLY); + rmp->context = mp->context; + rmp->retval = htonl (rv); + rmp->segment_name_length = vec_len (a->segment_name); + clib_memcpy (rmp->segment_name, a->segment_name, vec_len (a->segment_name)); + vec_reset_length (a->segment_name); + + if (event_q) + { + rmp->vpp_event_queue_address = pointer_to_uword (event_q); + rmp->server_rx_fifo = pointer_to_uword (rx_fifo); + rmp->server_tx_fifo = pointer_to_uword (tx_fifo); + } + client_q = + uword_to_pointer (mp->client_queue_address, unix_shared_memory_queue_t *); + + ASSERT (client_q); + vl_msg_api_send_shmem (client_q, (u8 *) & rmp); +} + +static void +vppcom_send_bind_sock (session_t * session) +{ + vppcom_main_t *vcm = &vppcom_main; + vl_api_bind_sock_t *bmp; + + /* Assumes caller has acquired spinlock: vcm->sessions_lockp */ + session->is_server = 1; + bmp = vl_msg_api_alloc (sizeof (*bmp)); + memset (bmp, 0, sizeof (*bmp)); + + bmp->_vl_msg_id = ntohs (VL_API_BIND_SOCK); + bmp->client_index = vcm->my_client_index; + bmp->context = htonl (0xfeedface); + bmp->vrf = session->vrf; + bmp->is_ip4 = session->lcl_addr.is_ip4; + clib_memcpy (bmp->ip, &session->lcl_addr.ip46, sizeof (bmp->ip)); + bmp->port = session->port; + bmp->proto = session->proto; + clib_memcpy (bmp->options, session->options, sizeof (bmp->options)); + vl_msg_api_send_shmem (vcm->vl_input_queue, (u8 *) & bmp); +} + +static void +vppcom_send_unbind_sock (u32 session_index) +{ + vppcom_main_t *vcm = &vppcom_main; + vl_api_unbind_sock_t *ump; + session_t *session = 0; + int rv; + + clib_spinlock_lock (&vcm->sessions_lockp); + rv = vppcom_session_at_index (session_index, &session); + if (PREDICT_FALSE (rv)) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] invalid session, sid (%u) has been closed!", + vcm->my_pid, session_index); + return; + } + + ump = vl_msg_api_alloc (sizeof (*ump)); + memset (ump, 0, sizeof (*ump)); + + ump->_vl_msg_id = ntohs (VL_API_UNBIND_SOCK); + ump->client_index = vcm->my_client_index; + ump->handle = session->vpp_session_handle; + clib_spinlock_unlock (&vcm->sessions_lockp); + vl_msg_api_send_shmem (vcm->vl_input_queue, (u8 *) & ump); +} + +static int +vppcom_session_unbind_cut_thru (session_t * session) +{ + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + svm_fifo_segment_private_t *seg; + int rv = VPPCOM_OK; + + seg = vec_elt_at_index (sm->segments, session->sm_seg_index); + svm_fifo_segment_free_fifo (seg, session->server_rx_fifo, + FIFO_SEGMENT_RX_FREELIST); + svm_fifo_segment_free_fifo (seg, session->server_tx_fifo, + FIFO_SEGMENT_TX_FREELIST); + svm_fifo_segment_delete (seg); + + return rv; +} + +static int +vppcom_session_unbind (u32 session_index) +{ + vppcom_main_t *vcm = &vppcom_main; + int rv; + + clib_spinlock_lock (&vcm->sessions_lockp); + if (PREDICT_FALSE (pool_is_free_index (vcm->sessions, session_index))) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 1) + clib_warning ("[%d] invalid session, sid (%u) has been closed!", + vcm->my_pid, session_index); + return VPPCOM_EBADFD; + } + clib_spinlock_unlock (&vcm->sessions_lockp); + + vcm->bind_session_index = session_index; + vppcom_send_unbind_sock (session_index); + rv = vppcom_wait_for_session_state_change (session_index, STATE_START, + vcm->cfg.session_timeout); + if (PREDICT_FALSE (rv)) + { + vcm->bind_session_index = ~0; + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] server unbind timed out, rv = %s (%d)", + vcm->my_pid, vppcom_retval_str (rv), rv); + return rv; + } + return VPPCOM_OK; +} + +static int +vppcom_session_disconnect (u32 session_index) +{ + vppcom_main_t *vcm = &vppcom_main; + int rv; + + rv = vppcom_send_disconnect (session_index); + if (PREDICT_FALSE (rv)) + return rv; + + rv = vppcom_wait_for_session_state_change (session_index, STATE_DISCONNECT, + vcm->cfg.session_timeout); + if (PREDICT_FALSE (rv)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] client disconnect timed out, rv = %s (%d)", + vcm->my_pid, vppcom_retval_str (rv), rv); + return rv; + } + return VPPCOM_OK; +} + +#define foreach_sock_msg \ +_(SESSION_ENABLE_DISABLE_REPLY, session_enable_disable_reply) \ +_(BIND_SOCK_REPLY, bind_sock_reply) \ +_(UNBIND_SOCK_REPLY, unbind_sock_reply) \ +_(ACCEPT_SESSION, accept_session) \ +_(CONNECT_SOCK, connect_sock) \ +_(CONNECT_SESSION_REPLY, connect_session_reply) \ +_(DISCONNECT_SESSION, disconnect_session) \ +_(DISCONNECT_SESSION_REPLY, disconnect_session_reply) \ +_(RESET_SESSION, reset_session) \ +_(APPLICATION_ATTACH_REPLY, application_attach_reply) \ +_(APPLICATION_DETACH_REPLY, application_detach_reply) \ +_(MAP_ANOTHER_SEGMENT, map_another_segment) + +static void +vppcom_api_hookup (void) +{ +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_sock_msg; +#undef _ +} + +static void +vppcom_cfg_init (vppcom_cfg_t * vcl_cfg) +{ + ASSERT (vcl_cfg); + + vcl_cfg->heapsize = (256ULL << 20); + vcl_cfg->segment_baseva = 0x200000000ULL; + vcl_cfg->segment_size = (256 << 20); + vcl_cfg->add_segment_size = (128 << 20); + vcl_cfg->preallocated_fifo_pairs = 8; + vcl_cfg->rx_fifo_size = (1 << 20); + vcl_cfg->tx_fifo_size = (1 << 20); + vcl_cfg->event_queue_size = 2048; + vcl_cfg->listen_queue_size = CLIB_CACHE_LINE_BYTES / sizeof (u32); + vcl_cfg->app_timeout = 10 * 60.0; + vcl_cfg->session_timeout = 10 * 60.0; + vcl_cfg->accept_timeout = 60.0; +} + +static void +vppcom_cfg_heapsize (char *conf_fname) +{ + vppcom_main_t *vcm = &vppcom_main; + vppcom_cfg_t *vcl_cfg = &vcm->cfg; + FILE *fp; + char inbuf[4096]; + int argc = 1; + char **argv = NULL; + char *arg = NULL; + char *p; + int i; + u8 *sizep; + u32 size; + + fp = fopen (conf_fname, "r"); + if (fp == NULL) + { + if (VPPCOM_DEBUG > 0) + fprintf (stderr, "open configuration file '%s' failed\n", conf_fname); + goto defaulted; + } + argv = calloc (1, sizeof (char *)); + if (argv == NULL) + goto defaulted; + + while (1) + { + if (fgets (inbuf, 4096, fp) == 0) + break; + p = strtok (inbuf, " \t\n"); + while (p != NULL) + { + if (*p == '#') + break; + argc++; + char **tmp = realloc (argv, argc * sizeof (char *)); + if (tmp == NULL) + goto defaulted; + argv = tmp; + arg = strndup (p, 1024); + if (arg == NULL) + goto defaulted; + argv[argc - 1] = arg; + p = strtok (NULL, " \t\n"); + } + } + + fclose (fp); + fp = NULL; + + char **tmp = realloc (argv, (argc + 1) * sizeof (char *)); + if (tmp == NULL) + goto defaulted; + argv = tmp; + argv[argc] = NULL; + + /* + * Look for and parse the "heapsize" config parameter. + * Manual since none of the clib infra has been bootstrapped yet. + * + * Format: heapsize <nn>[mM][gG] + */ + + for (i = 1; i < (argc - 1); i++) + { + if (!strncmp (argv[i], "heapsize", 8)) + { + sizep = (u8 *) argv[i + 1]; + size = 0; + while (*sizep >= '0' && *sizep <= '9') + { + size *= 10; + size += *sizep++ - '0'; + } + if (size == 0) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] parse error '%s %s', " + "using default heapsize %lld (0x%llx)", + vcm->my_pid, argv[i], argv[i + 1], + vcl_cfg->heapsize, vcl_cfg->heapsize); + goto defaulted; + } + + if (*sizep == 'g' || *sizep == 'G') + vcl_cfg->heapsize = size << 30; + else if (*sizep == 'm' || *sizep == 'M') + vcl_cfg->heapsize = size << 20; + else + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] parse error '%s %s', " + "using default heapsize %lld (0x%llx)", + vcm->my_pid, argv[i], argv[i + 1], + vcl_cfg->heapsize, vcl_cfg->heapsize); + goto defaulted; + } + } + } + +defaulted: + if (fp != NULL) + fclose (fp); + if (argv != NULL) + free (argv); + if (!clib_mem_init (0, vcl_cfg->heapsize)) + clib_warning ("[%d] vppcom heap allocation failure!", vcm->my_pid); + else if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] allocated vppcom heapsize %lld (0x%llx)", + vcm->my_pid, vcl_cfg->heapsize, vcl_cfg->heapsize); +} + +static void +vppcom_cfg_read (char *conf_fname) +{ + vppcom_main_t *vcm = &vppcom_main; + vppcom_cfg_t *vcl_cfg = &vcm->cfg; + int fd; + unformat_input_t _input, *input = &_input; + unformat_input_t _line_input, *line_input = &_line_input; + u8 vc_cfg_input = 0; + u8 *chroot_path; + struct stat s; + u32 uid, gid; + + fd = open (conf_fname, O_RDONLY); + if (fd < 0) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] open configuration file '%s' failed!", + vcm->my_pid, conf_fname); + goto file_done; + } + + if (fstat (fd, &s) < 0) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] failed to stat `%s'", vcm->my_pid, conf_fname); + goto file_done; + } + + if (!(S_ISREG (s.st_mode) || S_ISLNK (s.st_mode))) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] not a regular file `%s'", vcm->my_pid, + conf_fname); + goto file_done; + } + + unformat_init_clib_file (input, fd); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + (void) unformat_user (input, unformat_line_input, line_input); + unformat_skip_white_space (line_input); + + if (unformat (line_input, "vppcom {")) + { + vc_cfg_input = 1; + continue; + } + + if (vc_cfg_input) + { + if (unformat (line_input, "heapsize %s", &chroot_path)) + { + vec_terminate_c_string (chroot_path); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] configured heapsize %s, " + "actual heapsize %lld (0x%llx)", + vcm->my_pid, chroot_path, vcl_cfg->heapsize, + vcl_cfg->heapsize); + vec_free (chroot_path); + } + else if (unformat (line_input, "api-prefix %s", &chroot_path)) + { + vec_terminate_c_string (chroot_path); + vl_set_memory_root_path ((char *) chroot_path); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] configured api-prefix %s", + vcm->my_pid, chroot_path); + chroot_path = 0; /* Don't vec_free() it! */ + } + else if (unformat (line_input, "uid %d", &uid)) + { + vl_set_memory_uid (uid); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] configured uid %d", vcm->my_pid, uid); + } + else if (unformat (line_input, "gid %d", &gid)) + { + vl_set_memory_gid (gid); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] configured gid %d", vcm->my_pid, gid); + } + else if (unformat (line_input, "segment-baseva 0x%llx", + &vcl_cfg->segment_baseva)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] configured segment_baseva 0x%llx", + vcm->my_pid, vcl_cfg->segment_baseva); + } + else if (unformat (line_input, "segment-size 0x%lx", + &vcl_cfg->segment_size)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] configured segment_size 0x%lx (%ld)", + vcm->my_pid, vcl_cfg->segment_size, + vcl_cfg->segment_size); + } + else if (unformat (line_input, "segment-size %ld", + &vcl_cfg->segment_size)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] configured segment_size %ld (0x%lx)", + vcm->my_pid, vcl_cfg->segment_size, + vcl_cfg->segment_size); + } + else if (unformat (line_input, "add-segment-size 0x%lx", + &vcl_cfg->add_segment_size)) + { + if (VPPCOM_DEBUG > 0) + clib_warning + ("[%d] configured add_segment_size 0x%lx (%ld)", + vcm->my_pid, vcl_cfg->add_segment_size, + vcl_cfg->add_segment_size); + } + else if (unformat (line_input, "add-segment-size %ld", + &vcl_cfg->add_segment_size)) + { + if (VPPCOM_DEBUG > 0) + clib_warning + ("[%d] configured add_segment_size %ld (0x%lx)", + vcm->my_pid, vcl_cfg->add_segment_size, + vcl_cfg->add_segment_size); + } + else if (unformat (line_input, "preallocated-fifo-pairs %d", + &vcl_cfg->preallocated_fifo_pairs)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] configured preallocated_fifo_pairs " + "%d (0x%x)", vcm->my_pid, + vcl_cfg->preallocated_fifo_pairs, + vcl_cfg->preallocated_fifo_pairs); + } + else if (unformat (line_input, "rx-fifo-size 0x%lx", + &vcl_cfg->rx_fifo_size)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] configured rx_fifo_size 0x%lx (%ld)", + vcm->my_pid, vcl_cfg->rx_fifo_size, + vcl_cfg->rx_fifo_size); + } + else if (unformat (line_input, "rx-fifo-size %ld", + &vcl_cfg->rx_fifo_size)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] configured rx_fifo_size %ld (0x%lx)", + vcm->my_pid, vcl_cfg->rx_fifo_size, + vcl_cfg->rx_fifo_size); + } + else if (unformat (line_input, "tx-fifo-size 0x%lx", + &vcl_cfg->tx_fifo_size)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] configured tx_fifo_size 0x%lx (%ld)", + vcm->my_pid, vcl_cfg->tx_fifo_size, + vcl_cfg->tx_fifo_size); + } + else if (unformat (line_input, "tx-fifo-size %ld", + &vcl_cfg->tx_fifo_size)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] configured tx_fifo_size %ld (0x%lx)", + vcm->my_pid, vcl_cfg->tx_fifo_size, + vcl_cfg->tx_fifo_size); + } + else if (unformat (line_input, "event-queue-size 0x%lx", + &vcl_cfg->event_queue_size)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] configured event_queue_size 0x%lx (%ld)", + vcm->my_pid, vcl_cfg->event_queue_size, + vcl_cfg->event_queue_size); + } + else if (unformat (line_input, "event-queue-size %ld", + &vcl_cfg->event_queue_size)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] configured event_queue_size %ld (0x%lx)", + vcm->my_pid, vcl_cfg->event_queue_size, + vcl_cfg->event_queue_size); + } + else if (unformat (line_input, "listen-queue-size 0x%lx", + &vcl_cfg->listen_queue_size)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] configured listen_queue_size 0x%lx (%ld)", + vcm->my_pid, vcl_cfg->listen_queue_size, + vcl_cfg->listen_queue_size); + } + else if (unformat (line_input, "listen-queue-size %ld", + &vcl_cfg->listen_queue_size)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] configured listen_queue_size %ld (0x%lx)", + vcm->my_pid, vcl_cfg->listen_queue_size, + vcl_cfg->listen_queue_size); + } + else if (unformat (line_input, "app-timeout %f", + &vcl_cfg->app_timeout)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] configured app_timeout %f", + vcm->my_pid, vcl_cfg->app_timeout); + } + else if (unformat (line_input, "session-timeout %f", + &vcl_cfg->session_timeout)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] configured session_timeout %f", + vcm->my_pid, vcl_cfg->session_timeout); + } + else if (unformat (line_input, "accept-timeout %f", + &vcl_cfg->accept_timeout)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] configured accept_timeout %f", + vcm->my_pid, vcl_cfg->accept_timeout); + } + else if (unformat (line_input, "}")) + { + vc_cfg_input = 0; + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] completed parsing vppcom config!", + vcm->my_pid); + goto input_done; + } + else + { + if (line_input->buffer[line_input->index] != '#') + { + clib_warning ("[%d] Unknown vppcom config option: '%s'", + vcm->my_pid, (char *) + &line_input->buffer[line_input->index]); + } + } + } + } + +input_done: + unformat_free (input); + +file_done: + if (fd >= 0) + close (fd); +} + +/* + * VPPCOM Public API functions + */ +int +vppcom_app_create (char *app_name) +{ + vppcom_main_t *vcm = &vppcom_main; + vppcom_cfg_t *vcl_cfg = &vcm->cfg; + u8 *heap; + mheap_t *h; + int rv; + + if (!vcm->init) + { + char *conf_fname; + + vcm->init = 1; + vcm->my_pid = getpid (); + clib_fifo_validate (vcm->client_session_index_fifo, + vcm->cfg.listen_queue_size); + vppcom_cfg_init (vcl_cfg); + conf_fname = getenv (VPPCOM_CONF_ENV); + if (!conf_fname) + { + conf_fname = VPPCOM_CONF_DEFAULT; + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] getenv '%s' failed!", vcm->my_pid, + VPPCOM_CONF_ENV); + } + vppcom_cfg_heapsize (conf_fname); + vppcom_cfg_read (conf_fname); + vcm->bind_session_index = ~0; + vcm->main_cpu = os_get_thread_index (); + heap = clib_mem_get_per_cpu_heap (); + h = mheap_header (heap); + + /* make the main heap thread-safe */ + h->flags |= MHEAP_FLAG_THREAD_SAFE; + + vcm->session_index_by_vpp_handles = hash_create (0, sizeof (uword)); + + clib_time_init (&vcm->clib_time); + vppcom_init_error_string_table (); + svm_fifo_segment_init (vcl_cfg->segment_baseva, + 20 /* timeout in secs */ ); + clib_spinlock_init (&vcm->sessions_lockp); + vppcom_api_hookup (); + } + + if (vcm->my_client_index == ~0) + { + vcm->app_state = STATE_APP_START; + rv = vppcom_connect_to_vpp (app_name); + if (rv) + { + clib_warning ("[%d] couldn't connect to VPP.", vcm->my_pid); + return rv; + } + + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] sending session enable", vcm->my_pid); + + rv = vppcom_app_session_enable (); + if (rv) + { + clib_warning ("[%d] vppcom_app_session_enable() failed!", + vcm->my_pid); + return rv; + } + + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] sending app attach", vcm->my_pid); + + rv = vppcom_app_attach (); + if (rv) + { + clib_warning ("[%d] vppcom_app_attach() failed!", vcm->my_pid); + return rv; + } + } + + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] app_name '%s', my_client_index %d (0x%x)", + vcm->my_pid, app_name, vcm->my_client_index, + vcm->my_client_index); + + return VPPCOM_OK; +} + +void +vppcom_app_destroy (void) +{ + vppcom_main_t *vcm = &vppcom_main; + int rv; + + if (vcm->my_client_index == ~0) + return; + + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] detaching from VPP, my_client_index %d (0x%x)", + vcm->my_pid, vcm->my_client_index, vcm->my_client_index); + + vppcom_app_detach (); + rv = vppcom_wait_for_app_state_change (STATE_APP_ENABLED); + if (PREDICT_FALSE (rv)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] application detach timed out, rv = %s (%d)", + vcm->my_pid, vppcom_retval_str (rv), rv); + } + vl_client_disconnect_from_vlib (); + vcm->my_client_index = ~0; + vcm->app_state = STATE_APP_START; +} + +int +vppcom_session_create (u32 vrf, u8 proto, u8 is_nonblocking) +{ + vppcom_main_t *vcm = &vppcom_main; + session_t *session; + u32 session_index; + + clib_spinlock_lock (&vcm->sessions_lockp); + pool_get (vcm->sessions, session); + memset (session, 0, sizeof (*session)); + session_index = session - vcm->sessions; + + session->vrf = vrf; + session->proto = proto; + session->state = STATE_START; + session->is_nonblocking = is_nonblocking ? 1 : 0; + clib_spinlock_unlock (&vcm->sessions_lockp); + + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] sid %d", vcm->my_pid, session_index); + + return (int) session_index; +} + +int +vppcom_session_close (uint32_t session_index) +{ + vppcom_main_t *vcm = &vppcom_main; + session_t *session = 0; + int rv; + + clib_spinlock_lock (&vcm->sessions_lockp); + rv = vppcom_session_at_index (session_index, &session); + if (PREDICT_FALSE (rv)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] invalid session, sid (%u) has been closed!", + vcm->my_pid, session_index); + clib_spinlock_unlock (&vcm->sessions_lockp); + goto done; + } + clib_spinlock_unlock (&vcm->sessions_lockp); + + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] sid %d", vcm->my_pid, session_index); + + if (session->is_vep) + { + u32 next_sid; + for (next_sid = session->vep.next_sid; next_sid != ~0; + next_sid = session->vep.next_sid) + { + rv = vppcom_epoll_ctl (session_index, EPOLL_CTL_DEL, next_sid, 0); + if ((VPPCOM_DEBUG > 0) && (rv < 0)) + clib_warning ("[%d] EPOLL_CTL_DEL vep_idx %u, sid %u failed, " + "rv = %s (%d)", session_index, next_sid, + vcm->my_pid, session_index, + vppcom_retval_str (rv), rv); + + clib_spinlock_lock (&vcm->sessions_lockp); + rv = vppcom_session_at_index (session_index, &session); + if (PREDICT_FALSE (rv)) + { + if (VPPCOM_DEBUG > 0) + clib_warning + ("[%d] invalid session, sid (%u) has been closed!", + vcm->my_pid, session_index); + clib_spinlock_unlock (&vcm->sessions_lockp); + goto done; + } + clib_spinlock_unlock (&vcm->sessions_lockp); + } + } + else + { + if (session->is_vep_session) + { + u32 vep_idx = session->vep.vep_idx; + rv = vppcom_epoll_ctl (vep_idx, EPOLL_CTL_DEL, session_index, 0); + if ((VPPCOM_DEBUG > 0) && (rv < 0)) + clib_warning ("[%d] EPOLL_CTL_DEL vep_idx %u, sid %u failed, " + "rv = %s (%d)", vep_idx, session_index, + vcm->my_pid, session_index, + vppcom_retval_str (rv), rv); + + clib_spinlock_lock (&vcm->sessions_lockp); + rv = vppcom_session_at_index (session_index, &session); + if (PREDICT_FALSE (rv)) + { + if (VPPCOM_DEBUG > 0) + clib_warning + ("[%d] invalid session, sid (%u) has been closed!", + vcm->my_pid, session_index); + clib_spinlock_unlock (&vcm->sessions_lockp); + goto done; + } + clib_spinlock_unlock (&vcm->sessions_lockp); + } + + if (session->is_cut_thru && session->is_server && + (session->state == STATE_ACCEPT)) + { + rv = vppcom_session_unbind_cut_thru (session); + if ((VPPCOM_DEBUG > 0) && (rv < 0)) + clib_warning ("[%d] unbind cut-thru (session %d) failed, " + "rv = %s (%d)", + vcm->my_pid, session_index, + vppcom_retval_str (rv), rv); + } + else if (session->is_server && session->is_listen) + { + rv = vppcom_session_unbind (session_index); + if ((VPPCOM_DEBUG > 0) && (rv < 0)) + clib_warning ("[%d] unbind (session %d) failed, rv = %s (%d)", + vcm->my_pid, session_index, + vppcom_retval_str (rv), rv); + } + else if (session->state == STATE_CONNECT) + { + rv = vppcom_session_disconnect (session_index); + if ((VPPCOM_DEBUG > 0) && (rv < 0)) + clib_warning ("[%d] disconnect (session %d) failed, rv = %s (%d)", + vcm->my_pid, session_index, + vppcom_retval_str (rv), rv); + } + } + pool_put_index (vcm->sessions, session_index); +done: + return rv; +} + +int +vppcom_session_bind (uint32_t session_index, vppcom_endpt_t * ep) +{ + vppcom_main_t *vcm = &vppcom_main; + session_t *session = 0; + int rv; + + if (!ep || !ep->ip) + return VPPCOM_EINVAL; + + clib_spinlock_lock (&vcm->sessions_lockp); + rv = vppcom_session_at_index (session_index, &session); + if (PREDICT_FALSE (rv)) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] invalid session, sid (%u) has been closed!", + vcm->my_pid, session_index); + return rv; + } + + if (session->is_vep) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] invalid session, sid (%u) is an epoll session!", + vcm->my_pid, session_index); + return VPPCOM_EBADFD; + } + + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] sid %d", vcm->my_pid, session_index); + + session->vrf = ep->vrf; + session->lcl_addr.is_ip4 = ep->is_ip4; + session->lcl_addr.ip46 = to_ip46 (!ep->is_ip4, ep->ip); + session->port = ep->port; + + clib_spinlock_unlock (&vcm->sessions_lockp); + return VPPCOM_OK; +} + +int +vppcom_session_listen (uint32_t listen_session_index, uint32_t q_len) +{ + vppcom_main_t *vcm = &vppcom_main; + session_t *listen_session = 0; + int rv; + + clib_spinlock_lock (&vcm->sessions_lockp); + rv = vppcom_session_at_index (listen_session_index, &listen_session); + if (PREDICT_FALSE (rv)) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] invalid session, sid (%u) has been closed!", + vcm->my_pid, listen_session_index); + return rv; + } + + if (listen_session->is_vep) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] invalid session, sid (%u) is an epoll session!", + vcm->my_pid, listen_session_index); + return VPPCOM_EBADFD; + } + + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] sid %d", vcm->my_pid, listen_session_index); + + ASSERT (vcm->bind_session_index == ~0); + vcm->bind_session_index = listen_session_index; + vppcom_send_bind_sock (listen_session); + clib_spinlock_unlock (&vcm->sessions_lockp); + rv = + vppcom_wait_for_session_state_change (listen_session_index, STATE_LISTEN, + vcm->cfg.session_timeout); + if (PREDICT_FALSE (rv)) + { + vcm->bind_session_index = ~0; + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] server listen timed out, rv = %d (%d)", + vcm->my_pid, vppcom_retval_str (rv), rv); + return rv; + } + + clib_spinlock_lock (&vcm->sessions_lockp); + rv = vppcom_session_at_index (listen_session_index, &listen_session); + if (PREDICT_FALSE (rv)) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] invalid session, sid (%u) has been closed!", + vcm->my_pid, listen_session_index); + return rv; + } + listen_session->is_listen = 1; + clib_fifo_validate (vcm->client_session_index_fifo, q_len); + clib_spinlock_unlock (&vcm->sessions_lockp); + + return VPPCOM_OK; +} + +int +vppcom_session_accept (uint32_t listen_session_index, vppcom_endpt_t * ep, + double wait_for_time) +{ + vppcom_main_t *vcm = &vppcom_main; + session_t *listen_session = 0; + session_t *client_session = 0; + u32 client_session_index; + int rv; + f64 wait_for; + + clib_spinlock_lock (&vcm->sessions_lockp); + rv = vppcom_session_at_index (listen_session_index, &listen_session); + if (PREDICT_FALSE (rv)) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] invalid session, sid (%u) has been closed!", + vcm->my_pid, listen_session_index); + return rv; + } + + if (listen_session->is_vep) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] invalid session, sid (%u) is an epoll session!", + vcm->my_pid, listen_session_index); + return VPPCOM_EBADFD; + } + + if (listen_session->state != STATE_LISTEN) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] session not in listen state, state = %s", + vcm->my_pid, + vppcom_session_state_str (listen_session->state)); + return VPPCOM_EBADFD; + } + wait_for = listen_session->is_nonblocking ? 0 : + (wait_for_time < 0) ? vcm->cfg.accept_timeout : wait_for_time; + + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] sid %d: %s (%d)", vcm->my_pid, + listen_session_index, + vppcom_session_state_str (listen_session->state), + listen_session->state); + clib_spinlock_unlock (&vcm->sessions_lockp); + + while (1) + { + rv = vppcom_wait_for_client_session_index (wait_for); + if (rv) + { + if ((VPPCOM_DEBUG > 0)) + clib_warning ("[%d] sid %d, accept timed out, rv = %s (%d)", + vcm->my_pid, listen_session_index, + vppcom_retval_str (rv), rv); + if ((wait_for == 0) || (wait_for_time > 0)) + return rv; + } + else + break; + } + + clib_spinlock_lock (&vcm->sessions_lockp); + clib_fifo_sub1 (vcm->client_session_index_fifo, client_session_index); + rv = vppcom_session_at_index (client_session_index, &client_session); + ASSERT (rv == VPPCOM_OK); + ASSERT (client_session->peer_addr.is_ip4 == + listen_session->lcl_addr.is_ip4); + + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] Got a request: client sid %d", vcm->my_pid, + client_session_index); + + ep->vrf = client_session->vrf; + ep->is_cut_thru = client_session->is_cut_thru; + ep->is_ip4 = client_session->peer_addr.is_ip4; + ep->port = client_session->port; + if (client_session->peer_addr.is_ip4) + clib_memcpy (ep->ip, &client_session->peer_addr.ip46.ip4, + sizeof (ip4_address_t)); + else + clib_memcpy (ep->ip, &client_session->peer_addr.ip46.ip6, + sizeof (ip6_address_t)); + clib_spinlock_unlock (&vcm->sessions_lockp); + return (int) client_session_index; +} + +int +vppcom_session_connect (uint32_t session_index, vppcom_endpt_t * server_ep) +{ + vppcom_main_t *vcm = &vppcom_main; + session_t *session = 0; + int rv; + + clib_spinlock_lock (&vcm->sessions_lockp); + rv = vppcom_session_at_index (session_index, &session); + if (PREDICT_FALSE (rv)) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] invalid session, sid (%u) has been closed!", + vcm->my_pid, session_index); + return rv; + } + + if (session->is_vep) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] invalid session, sid (%u) is an epoll session!", + vcm->my_pid, session_index); + return VPPCOM_EBADFD; + } + + if (session->state == STATE_CONNECT) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] session, sid (%u) already connected!", + vcm->my_pid, session_index); + return VPPCOM_OK; + } + + session->vrf = server_ep->vrf; + session->peer_addr.is_ip4 = server_ep->is_ip4; + session->peer_addr.ip46 = to_ip46 (!server_ep->is_ip4, server_ep->ip); + session->port = server_ep->port; + + if (VPPCOM_DEBUG > 0) + { + u8 *ip_str = format (0, "%U", format_ip46_address, + &session->peer_addr.ip46, + session->peer_addr.is_ip4); + clib_warning ("[%d] connect sid %d to %s server port %d proto %s", + vcm->my_pid, session_index, ip_str, + clib_net_to_host_u16 (session->port), + session->proto ? "UDP" : "TCP"); + vec_free (ip_str); + } + + vppcom_send_connect_sock (session, session_index); + clib_spinlock_unlock (&vcm->sessions_lockp); + rv = vppcom_wait_for_session_state_change (session_index, STATE_CONNECT, + vcm->cfg.session_timeout); + if (PREDICT_FALSE (rv)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] connect timed out, rv = %s (%d)", + vcm->my_pid, vppcom_retval_str (rv), rv); + return rv; + } + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] sid %d connected!", vcm->my_pid, session_index); + + return VPPCOM_OK; +} + +int +vppcom_session_read (uint32_t session_index, void *buf, int n) +{ + vppcom_main_t *vcm = &vppcom_main; + session_t *session = 0; + svm_fifo_t *rx_fifo; + int n_read = 0; + int rv; + char *fifo_str; + u32 poll_et; + + ASSERT (buf); + + clib_spinlock_lock (&vcm->sessions_lockp); + rv = vppcom_session_at_index (session_index, &session); + if (PREDICT_FALSE (rv)) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] invalid session, sid (%u) has been closed!", + vcm->my_pid, session_index); + return rv; + } + + if (session->is_vep) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] invalid session, sid (%u) is an epoll session!", + vcm->my_pid, session_index); + return VPPCOM_EBADFD; + } + + if (session->state == STATE_DISCONNECT) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] sid (%u) has been closed by remote peer!", + vcm->my_pid, session_index); + return VPPCOM_ECONNRESET; + } + + rx_fifo = ((!session->is_cut_thru || session->is_server) ? + session->server_rx_fifo : session->server_tx_fifo); + fifo_str = ((!session->is_cut_thru || session->is_server) ? + "server_rx_fifo" : "server_tx_fifo"); + poll_et = EPOLLET & session->vep.ev.events; + clib_spinlock_unlock (&vcm->sessions_lockp); + + do + { + n_read = svm_fifo_dequeue_nowait (rx_fifo, n, buf); + } + while (!session->is_nonblocking && (n_read <= 0)); + + if (poll_et && (n_read <= 0)) + { + clib_spinlock_lock (&vcm->sessions_lockp); + session->vep.et_mask |= EPOLLIN; + clib_spinlock_unlock (&vcm->sessions_lockp); + } + + if ((VPPCOM_DEBUG > 2) && (n_read > 0)) + clib_warning ("[%d] sid %d, read %d bytes from %s (%p)", vcm->my_pid, + session_index, n_read, fifo_str, rx_fifo); + + return (n_read <= 0) ? VPPCOM_EAGAIN : n_read; +} + +static inline int +vppcom_session_read_ready (session_t * session, u32 session_index) +{ + vppcom_main_t *vcm = &vppcom_main; + svm_fifo_t *rx_fifo; + int ready = 0; + + /* Assumes caller has acquired spinlock: vcm->sessions_lockp */ + if (session->is_vep) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] invalid session, sid (%u) is an epoll session!", + vcm->my_pid, session_index); + return VPPCOM_EBADFD; + } + + if (session->state == STATE_DISCONNECT) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] sid (%u) has been closed by remote peer!", + vcm->my_pid, session_index); + return VPPCOM_ECONNRESET; + } + + if (session->is_listen) + ready = clib_fifo_elts (vcm->client_session_index_fifo); + else + { + rx_fifo = ((!session->is_cut_thru || session->is_server) ? + session->server_rx_fifo : session->server_tx_fifo); + + ready = svm_fifo_max_dequeue (rx_fifo); + } + + if (VPPCOM_DEBUG > 3) + clib_warning ("[%d] sid %d, peek %s (%p), ready = %d", vcm->my_pid, + session_index, + session->is_server ? "server_rx_fifo" : "server_tx_fifo", + rx_fifo, ready); + if ((session->vep.ev.events & EPOLLET) && (ready == 0)) + session->vep.et_mask |= EPOLLIN; + + return ready; +} + +int +vppcom_session_write (uint32_t session_index, void *buf, int n) +{ + vppcom_main_t *vcm = &vppcom_main; + session_t *session = 0; + svm_fifo_t *tx_fifo; + unix_shared_memory_queue_t *q; + session_fifo_event_t evt; + int rv, n_write; + char *fifo_str; + u32 poll_et; + + ASSERT (buf); + + clib_spinlock_lock (&vcm->sessions_lockp); + rv = vppcom_session_at_index (session_index, &session); + if (PREDICT_FALSE (rv)) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] invalid session, sid (%u) has been closed!", + vcm->my_pid, session_index); + return rv; + } + + if (session->is_vep) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] invalid session, sid (%u) is an epoll session!", + vcm->my_pid, session_index); + return VPPCOM_EBADFD; + } + + if (session->state == STATE_DISCONNECT) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] sid (%u) has been closed by remote peer!", + vcm->my_pid, session_index); + return VPPCOM_ECONNRESET; + } + + tx_fifo = ((!session->is_cut_thru || session->is_server) ? + session->server_tx_fifo : session->server_rx_fifo); + fifo_str = ((!session->is_cut_thru || session->is_server) ? + "server_tx_fifo" : "server_rx_fifo"); + q = session->vpp_event_queue; + poll_et = EPOLLET & session->vep.ev.events; + clib_spinlock_unlock (&vcm->sessions_lockp); + + do + { + n_write = svm_fifo_enqueue_nowait (tx_fifo, n, buf); + } + while (!session->is_nonblocking && (n_write <= 0)); + + /* If event wasn't set, add one */ + if (!session->is_cut_thru && (n_write > 0) && svm_fifo_set_event (tx_fifo)) + { + int rval; + + /* Fabricate TX event, send to vpp */ + evt.fifo = tx_fifo; + evt.event_type = FIFO_EVENT_APP_TX; + + rval = vppcom_session_at_index (session_index, &session); + if (PREDICT_FALSE (rval)) + { + if (VPPCOM_DEBUG > 1) + clib_warning ("[%d] invalid session, sid (%u) has been closed!", + vcm->my_pid, session_index); + return rval; + } + ASSERT (q); + unix_shared_memory_queue_add (q, (u8 *) & evt, + 0 /* do wait for mutex */ ); + } + + if (poll_et && (n_write <= 0)) + { + clib_spinlock_lock (&vcm->sessions_lockp); + session->vep.et_mask |= EPOLLOUT; + clib_spinlock_unlock (&vcm->sessions_lockp); + } + + if (VPPCOM_DEBUG > 2) + { + if (n_write == -2) + clib_warning ("[%d] sid %d, FIFO-FULL %s (%p)", vcm->my_pid, + session_index, fifo_str, tx_fifo); + else + clib_warning ("[%d] sid %d, wrote %d bytes to %s (%p)", vcm->my_pid, + session_index, n_write, fifo_str, tx_fifo); + } + return (n_write < 0) ? VPPCOM_EAGAIN : n_write; +} + +static inline int +vppcom_session_write_ready (session_t * session, u32 session_index) +{ + vppcom_main_t *vcm = &vppcom_main; + svm_fifo_t *tx_fifo; + char *fifo_str; + int ready; + + /* Assumes caller has acquired spinlock: vcm->sessions_lockp */ + if (session->is_vep) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] invalid session, sid (%u) is an epoll session!", + vcm->my_pid, session_index); + return VPPCOM_EBADFD; + } + + if (session->state == STATE_DISCONNECT) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] sid (%u) has been closed by remote peer!", + vcm->my_pid, session_index); + return VPPCOM_ECONNRESET; + } + + tx_fifo = ((!session->is_cut_thru || session->is_server) ? + session->server_tx_fifo : session->server_rx_fifo); + fifo_str = ((!session->is_cut_thru || session->is_server) ? + "server_tx_fifo" : "server_rx_fifo"); + + ready = svm_fifo_max_enqueue (tx_fifo); + + if (VPPCOM_DEBUG > 3) + clib_warning ("[%d] sid %d, peek %s (%p), ready = %d", vcm->my_pid, + session_index, fifo_str, tx_fifo, ready); + if ((session->vep.ev.events & EPOLLET) && (ready == 0)) + session->vep.et_mask |= EPOLLOUT; + + return ready; +} + +int +vppcom_select (unsigned long n_bits, unsigned long *read_map, + unsigned long *write_map, unsigned long *except_map, + double time_to_wait) +{ + vppcom_main_t *vcm = &vppcom_main; + u32 session_index; + session_t *session = 0; + int rv, bits_set = 0; + f64 timeout = clib_time_now (&vcm->clib_time) + time_to_wait; + u32 minbits = clib_max (n_bits, BITS (uword)); + + ASSERT (sizeof (clib_bitmap_t) == sizeof (long int)); + + if (n_bits && read_map) + { + clib_bitmap_validate (vcm->rd_bitmap, minbits); + clib_memcpy (vcm->rd_bitmap, read_map, vec_len (vcm->rd_bitmap)); + memset (read_map, 0, vec_len (vcm->rd_bitmap)); + } + if (n_bits && write_map) + { + clib_bitmap_validate (vcm->wr_bitmap, minbits); + clib_memcpy (vcm->wr_bitmap, write_map, vec_len (vcm->wr_bitmap)); + memset (write_map, 0, vec_len (vcm->wr_bitmap)); + } + if (n_bits && except_map) + { + clib_bitmap_validate (vcm->ex_bitmap, minbits); + clib_memcpy (vcm->ex_bitmap, except_map, vec_len (vcm->ex_bitmap)); + memset (except_map, 0, vec_len (vcm->ex_bitmap)); + } + + do + { + /* *INDENT-OFF* */ + if (n_bits) + { + if (read_map) + { + clib_bitmap_foreach (session_index, vcm->rd_bitmap, + ({ + clib_spinlock_lock (&vcm->sessions_lockp); + rv = vppcom_session_at_index (session_index, &session); + if (rv < 0) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 1) + clib_warning ("[%d] session %d specified in " + "read_map is closed.", vcm->my_pid, + session_index); + bits_set = VPPCOM_EBADFD; + goto select_done; + } + + rv = vppcom_session_read_ready (session, session_index); + clib_spinlock_unlock (&vcm->sessions_lockp); + if (except_map && vcm->ex_bitmap && + clib_bitmap_get (vcm->ex_bitmap, session_index) && + (rv < 0)) + { + // TBD: clib_warning + clib_bitmap_set_no_check (except_map, session_index, 1); + bits_set++; + } + else if (rv > 0) + { + // TBD: clib_warning + clib_bitmap_set_no_check (read_map, session_index, 1); + bits_set++; + } + })); + } + + if (write_map) + { + clib_bitmap_foreach (session_index, vcm->wr_bitmap, + ({ + clib_spinlock_lock (&vcm->sessions_lockp); + rv = vppcom_session_at_index (session_index, &session); + if (rv < 0) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 0) + clib_warning ("[%d] session %d specified in " + "write_map is closed.", vcm->my_pid, + session_index); + bits_set = VPPCOM_EBADFD; + goto select_done; + } + + rv = vppcom_session_write_ready (session, session_index); + clib_spinlock_unlock (&vcm->sessions_lockp); + if (write_map && (rv > 0)) + { + // TBD: clib_warning + clib_bitmap_set_no_check (write_map, session_index, 1); + bits_set++; + } + })); + } + + if (except_map) + { + clib_bitmap_foreach (session_index, vcm->ex_bitmap, + ({ + clib_spinlock_lock (&vcm->sessions_lockp); + rv = vppcom_session_at_index (session_index, &session); + if (rv < 0) + { + clib_spinlock_unlock (&vcm->sessions_lockp); + if (VPPCOM_DEBUG > 1) + clib_warning ("[%d] session %d specified in " + "except_map is closed.", vcm->my_pid, + session_index); + bits_set = VPPCOM_EBADFD; + goto select_done; + } + + rv = vppcom_session_read_ready (session, session_index); + clib_spinlock_unlock (&vcm->sessions_lockp); + if (rv < 0) + { + // TBD: clib_warning + clib_bitmap_set_no_check (except_map, session_index, 1); + bits_set++; + } + })); + } + } + /* *INDENT-ON* */ + } + while (clib_time_now (&vcm->clib_time) < timeout); + +select_done: + return (bits_set); +} + +static inline void +vep_verify_epoll_chain (u32 vep_idx) +{ + session_t *session; + vppcom_epoll_t *vep; + int rv; + u32 sid; + + if (VPPCOM_DEBUG < 1) + return; + + /* Assumes caller has acquired spinlock: vcm->sessions_lockp */ + rv = vppcom_session_at_index (vep_idx, &session); + if (PREDICT_FALSE (rv)) + { + clib_warning ("ERROR: Invalid vep_idx (%u)!", vep_idx); + goto done; + } + if (PREDICT_FALSE (!session->is_vep)) + { + clib_warning ("ERROR: vep_idx (%u) is not a vep!", vep_idx); + goto done; + } + if (VPPCOM_DEBUG > 1) + clib_warning ("vep_idx (%u): Dumping epoll chain\n" + "{\n" + " is_vep = %u\n" + " is_vep_session = %u\n" + " wait_cont_idx = 0x%x (%u)\n" + "}\n", + vep_idx, session->is_vep, session->is_vep_session, + session->wait_cont_idx, session->wait_cont_idx); + do + { + vep = &session->vep; + sid = vep->next_sid; + if (session->is_vep_session) + { + if (VPPCOM_DEBUG > 1) + clib_warning ("vep_idx[%u]: sid 0x%x (%u)\n" + "{\n" + " next_sid = 0x%x (%u)\n" + " prev_sid = 0x%x (%u)\n" + " vep_idx = 0x%x (%u)\n" + " ev.events = 0x%x\n" + " ev.data.u64 = 0x%llx\n" + " et_mask = 0x%x\n" + "}\n", + vep_idx, sid, sid, + vep->next_sid, vep->next_sid, + vep->prev_sid, vep->prev_sid, + vep->vep_idx, vep->vep_idx, + vep->ev.events, vep->ev.data.u64, vep->et_mask); + } + if (sid != ~0) + { + rv = vppcom_session_at_index (sid, &session); + if (PREDICT_FALSE (rv)) + { + clib_warning ("ERROR: Invalid sid (%u)!", sid); + goto done; + } + if (PREDICT_FALSE (session->is_vep)) + clib_warning ("ERROR: sid (%u) is a vep!", vep_idx); + else if (PREDICT_FALSE (!session->is_vep_session)) + { + clib_warning ("ERROR: session (%u) is not a vep session!", sid); + goto done; + } + if (PREDICT_FALSE (session->vep.vep_idx != vep_idx)) + clib_warning ("ERROR: session (%u) vep_idx (%u) != " + "vep_idx (%u)!", + sid, session->vep.vep_idx, vep_idx); + } + } + while (sid != ~0); + +done: + if (VPPCOM_DEBUG > 1) + clib_warning ("vep_idx (%u): Dump complete!", vep_idx); +} + +int +vppcom_epoll_create (void) +{ + vppcom_main_t *vcm = &vppcom_main; + session_t *vep_session; + u32 vep_idx; + + clib_spinlock_lock (&vcm->sessions_lockp); + pool_get (vcm->sessions, vep_session); + memset (vep_session, 0, sizeof (*vep_session)); + vep_idx = vep_session - vcm->sessions; + + vep_session->is_vep = 1; + vep_session->vep.vep_idx = ~0; + vep_session->vep.next_sid = ~0; + vep_session->vep.prev_sid = ~0; + vep_session->wait_cont_idx = ~0; + clib_spinlock_unlock (&vcm->sessions_lockp); + + if (VPPCOM_DEBUG > 0) + clib_warning ("Created vep_idx %u!", vep_idx); + + return (vep_idx); +} + +int +vppcom_epoll_ctl (uint32_t vep_idx, int op, uint32_t session_index, + struct epoll_event *event) +{ + vppcom_main_t *vcm = &vppcom_main; + session_t *vep_session; + session_t *session; + int rv; + + if (vep_idx == session_index) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("ERROR: vep_idx == session_index (%u)!", vep_idx); + return VPPCOM_EINVAL; + } + + clib_spinlock_lock (&vcm->sessions_lockp); + rv = vppcom_session_at_index (vep_idx, &vep_session); + if (PREDICT_FALSE (rv)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("ERROR: Invalid vep_idx (%u)!", vep_idx); + goto done; + } + if (PREDICT_FALSE (!vep_session->is_vep)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("ERROR: vep_idx (%u) is not a vep!", vep_idx); + rv = VPPCOM_EINVAL; + goto done; + } + + ASSERT (vep_session->vep.vep_idx == ~0); + ASSERT (vep_session->vep.prev_sid == ~0); + + rv = vppcom_session_at_index (session_index, &session); + if (PREDICT_FALSE (rv)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("ERROR: Invalid session_index (%u)!", session_index); + goto done; + } + if (PREDICT_FALSE (session->is_vep)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("ERROR: session_index (%u) is a vep!", vep_idx); + rv = VPPCOM_EINVAL; + goto done; + } + + switch (op) + { + case EPOLL_CTL_ADD: + if (PREDICT_FALSE (!event)) + { + clib_warning ("NULL pointer to epoll_event structure!"); + rv = VPPCOM_EINVAL; + goto done; + } + if (vep_session->vep.next_sid != ~0) + { + session_t *next_session; + rv = vppcom_session_at_index (vep_session->vep.next_sid, + &next_session); + if (PREDICT_FALSE (rv)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("EPOLL_CTL_ADD: Invalid vep.next_sid (%u) on" + " vep_idx (%u)!", vep_session->vep.next_sid, + vep_idx); + goto done; + } + ASSERT (next_session->vep.prev_sid == vep_idx); + next_session->vep.prev_sid = session_index; + } + session->vep.next_sid = vep_session->vep.next_sid; + session->vep.prev_sid = vep_idx; + session->vep.vep_idx = vep_idx; + session->vep.et_mask = VEP_DEFAULT_ET_MASK; + session->vep.ev = *event; + session->is_vep_session = 1; + vep_session->vep.next_sid = session_index; + if (VPPCOM_DEBUG > 1) + clib_warning ("EPOLL_CTL_ADD: vep_idx %u, sid %u, events 0x%x," + " data 0x%llx!", vep_idx, session_index, + event->events, event->data.u64); + break; + + case EPOLL_CTL_MOD: + if (PREDICT_FALSE (!event)) + { + clib_warning ("NULL pointer to epoll_event structure!"); + rv = VPPCOM_EINVAL; + goto done; + } + if (PREDICT_FALSE (!session->is_vep_session && + (session->vep.vep_idx != vep_idx))) + { + if (VPPCOM_DEBUG > 0) + { + if (!session->is_vep_session) + clib_warning ("EPOLL_CTL_MOD: session (%u) is not " + "a vep session!", session_index); + else + clib_warning ("EPOLL_CTL_MOD: session (%u) vep_idx (%u) != " + "vep_idx (%u)!", session_index, + session->vep.vep_idx, vep_idx); + } + rv = VPPCOM_EINVAL; + goto done; + } + session->vep.et_mask = VEP_DEFAULT_ET_MASK; + session->vep.ev = *event; + if (VPPCOM_DEBUG > 1) + clib_warning ("EPOLL_CTL_MOD: vep_idx %u, sid %u, events 0x%x," + " data 0x%llx!", vep_idx, session_index, + event->events, event->data.u64); + break; + + case EPOLL_CTL_DEL: + if (PREDICT_FALSE (!session->is_vep_session && + (session->vep.vep_idx != vep_idx))) + { + if (VPPCOM_DEBUG > 0) + { + if (!session->is_vep_session) + clib_warning ("EPOLL_CTL_DEL: session (%u) is not " + "a vep session!", session_index); + else + clib_warning ("EPOLL_CTL_DEL: session (%u) vep_idx (%u) != " + "vep_idx (%u)!", session_index, + session->vep.vep_idx, vep_idx); + } + rv = VPPCOM_EINVAL; + goto done; + } + + vep_session->wait_cont_idx = + (vep_session->wait_cont_idx == session_index) ? + session->vep.next_sid : vep_session->wait_cont_idx; + + if (session->vep.prev_sid == vep_idx) + vep_session->vep.next_sid = session->vep.next_sid; + else + { + session_t *prev_session; + rv = vppcom_session_at_index (session->vep.prev_sid, &prev_session); + if (PREDICT_FALSE (rv)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("EPOLL_CTL_DEL: Invalid vep.prev_sid (%u) on" + " sid (%u)!", session->vep.prev_sid, + session_index); + goto done; + } + ASSERT (prev_session->vep.next_sid == session_index); + prev_session->vep.next_sid = session->vep.next_sid; + } + if (session->vep.next_sid != ~0) + { + session_t *next_session; + rv = vppcom_session_at_index (session->vep.next_sid, &next_session); + if (PREDICT_FALSE (rv)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("EPOLL_CTL_DEL: Invalid vep.next_sid (%u) on" + " sid (%u)!", session->vep.next_sid, + session_index); + goto done; + } + ASSERT (next_session->vep.prev_sid == session_index); + next_session->vep.prev_sid = session->vep.prev_sid; + } + + memset (&session->vep, 0, sizeof (session->vep)); + session->vep.next_sid = ~0; + session->vep.prev_sid = ~0; + session->vep.vep_idx = ~0; + session->is_vep_session = 0; + if (VPPCOM_DEBUG > 1) + clib_warning ("EPOLL_CTL_DEL: vep_idx %u, sid %u!", vep_idx, + session_index); + break; + + default: + clib_warning ("Invalid operation (%d)!", op); + rv = VPPCOM_EINVAL; + } + + vep_verify_epoll_chain (vep_idx); + +done: + clib_spinlock_unlock (&vcm->sessions_lockp); + return rv; +} + +#define VCL_LOCK_AND_GET_SESSION(I, S) \ +do { \ + vppcom_main_t *vcm = &vppcom_main; \ + \ + clib_spinlock_lock (&vcm->sessions_lockp); \ + rv = vppcom_session_at_index (I, S); \ + if (PREDICT_FALSE (rv)) \ + { \ + clib_spinlock_unlock (&vcm->sessions_lockp); \ + \ + if (VPPCOM_DEBUG > 0) \ + clib_warning ("ERROR: Invalid ##I (%u)!", I); \ + \ + goto done; \ + } \ +} while (0) + +int +vppcom_epoll_wait (uint32_t vep_idx, struct epoll_event *events, + int maxevents, double wait_for_time) +{ + vppcom_main_t *vcm = &vppcom_main; + session_t *vep_session; + int rv; + f64 timeout = clib_time_now (&vcm->clib_time) + wait_for_time; + int num_ev = 0; + u32 vep_next_sid, wait_cont_idx; + u8 is_vep; + + if (PREDICT_FALSE (maxevents <= 0)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("ERROR: Invalid maxevents (%d)!", maxevents); + return VPPCOM_EINVAL; + } + if (PREDICT_FALSE (wait_for_time < 0)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("ERROR: Invalid wait_for_time (%f)!", wait_for_time); + return VPPCOM_EINVAL; + } + memset (events, 0, sizeof (*events) * maxevents); + + VCL_LOCK_AND_GET_SESSION (vep_idx, &vep_session); + vep_next_sid = vep_session->vep.next_sid; + is_vep = vep_session->is_vep; + wait_cont_idx = vep_session->wait_cont_idx; + clib_spinlock_unlock (&vcm->sessions_lockp); + + if (PREDICT_FALSE (!is_vep)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("ERROR: vep_idx (%u) is not a vep!", vep_idx); + rv = VPPCOM_EINVAL; + goto done; + } + if ((VPPCOM_DEBUG > 0) && (PREDICT_FALSE (vep_next_sid == ~0))) + { + clib_warning ("WARNING: vep_idx (%u) is empty!", vep_idx); + goto done; + } + + do + { + u32 sid; + u32 next_sid = ~0; + session_t *session; + + for (sid = (wait_cont_idx == ~0) ? vep_next_sid : wait_cont_idx; + sid != ~0; sid = next_sid) + { + u32 session_events, et_mask, clear_et_mask, session_vep_idx; + u8 add_event, is_vep_session; + int ready; + u64 session_ev_data; + + VCL_LOCK_AND_GET_SESSION (sid, &session); + next_sid = session->vep.next_sid; + session_events = session->vep.ev.events; + et_mask = session->vep.et_mask; + is_vep = session->is_vep; + is_vep_session = session->is_vep_session; + session_vep_idx = session->vep.vep_idx; + session_ev_data = session->vep.ev.data.u64; + clib_spinlock_unlock (&vcm->sessions_lockp); + + if (PREDICT_FALSE (is_vep)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("ERROR: sid (%u) is a vep!", vep_idx); + rv = VPPCOM_EINVAL; + goto done; + } + if (PREDICT_FALSE (!is_vep_session)) + { + if (VPPCOM_DEBUG > 0) + clib_warning ("EPOLL_CTL_MOD: session (%u) is not " + "a vep session!", sid); + rv = VPPCOM_EINVAL; + goto done; + } + if (PREDICT_FALSE (session_vep_idx != vep_idx)) + { + clib_warning ("EPOLL_CTL_MOD: session (%u) " + "vep_idx (%u) != vep_idx (%u)!", + sid, session->vep.vep_idx, vep_idx); + rv = VPPCOM_EINVAL; + goto done; + } + + add_event = clear_et_mask = 0; + + if ((EPOLLIN & session_events) && (EPOLLIN & et_mask)) + { + VCL_LOCK_AND_GET_SESSION (sid, &session); + ready = vppcom_session_read_ready (session, sid); + clib_spinlock_unlock (&vcm->sessions_lockp); + if (ready > 0) + { + add_event = 1; + events[num_ev].events |= EPOLLIN; + if (EPOLLET & session_events) + clear_et_mask |= EPOLLIN; + } + else if (ready < 0) + { + add_event = 1; + switch (ready) + { + case VPPCOM_ECONNRESET: + events[num_ev].events |= EPOLLHUP | EPOLLRDHUP; + break; + + default: + events[num_ev].events |= EPOLLERR; + break; + } + } + } + + if ((EPOLLOUT & session_events) && (EPOLLOUT & et_mask)) + { + VCL_LOCK_AND_GET_SESSION (sid, &session); + ready = vppcom_session_write_ready (session, sid); + clib_spinlock_unlock (&vcm->sessions_lockp); + if (ready > 0) + { + add_event = 1; + events[num_ev].events |= EPOLLOUT; + if (EPOLLET & session_events) + clear_et_mask |= EPOLLOUT; + } + else if (ready < 0) + { + add_event = 1; + switch (ready) + { + case VPPCOM_ECONNRESET: + events[num_ev].events |= EPOLLHUP; + break; + + default: + events[num_ev].events |= EPOLLERR; + break; + } + } + } + + if (add_event) + { + events[num_ev].data.u64 = session_ev_data; + if (EPOLLONESHOT & session_events) + { + VCL_LOCK_AND_GET_SESSION (sid, &session); + session->vep.ev.events = 0; + clib_spinlock_unlock (&vcm->sessions_lockp); + } + num_ev++; + if (num_ev == maxevents) + { + VCL_LOCK_AND_GET_SESSION (vep_idx, &vep_session); + vep_session->wait_cont_idx = next_sid; + clib_spinlock_unlock (&vcm->sessions_lockp); + goto done; + } + } + if (wait_cont_idx != ~0) + { + if (next_sid == ~0) + next_sid = vep_next_sid; + else if (next_sid == wait_cont_idx) + next_sid = ~0; + } + } + } + while ((num_ev == 0) && (clib_time_now (&vcm->clib_time) <= timeout)); + + if (wait_cont_idx != ~0) + { + VCL_LOCK_AND_GET_SESSION (vep_idx, &vep_session); + vep_session->wait_cont_idx = ~0; + clib_spinlock_unlock (&vcm->sessions_lockp); + } +done: + return (rv != VPPCOM_OK) ? rv : num_ev; +} + +int +vppcom_session_attr (uint32_t session_index, uint32_t op, + void *buffer, uint32_t * buflen) +{ + vppcom_main_t *vcm = &vppcom_main; + session_t *session; + int rv = VPPCOM_OK; + u32 *flags = buffer; + vppcom_endpt_t *ep = buffer; + + VCL_LOCK_AND_GET_SESSION (session_index, &session); + switch (op) + { + case VPPCOM_ATTR_GET_NREAD: + rv = vppcom_session_read_ready (session, session_index); + if (VPPCOM_DEBUG > 0) + clib_warning ("VPPCOM_ATTR_GET_NREAD: nread = %d", rv); + + break; + + case VPPCOM_ATTR_PEEK_NREAD: + /* TBD */ + break; + + case VPPCOM_ATTR_GET_FLAGS: + if (buffer && buflen && (*buflen >= sizeof (*flags))) + { + *flags = O_RDWR | ((session->is_nonblocking) ? O_NONBLOCK : 0); + *buflen = sizeof (*flags); + if (VPPCOM_DEBUG > 0) + clib_warning ("VPPCOM_ATTR_GET_FLAGS: flags = 0x%08x, " + "is_nonblocking = %u", *flags, + session->is_nonblocking); + } + else + rv = VPPCOM_EINVAL; + break; + + case VPPCOM_ATTR_SET_FLAGS: + if (buffer && buflen && (*buflen >= sizeof (*flags))) + { + session->is_nonblocking = (*flags & O_NONBLOCK) ? 1 : 0; + if (VPPCOM_DEBUG > 0) + clib_warning ("VPPCOM_ATTR_SET_FLAGS: flags = 0x%08x, " + "is_nonblocking = %u", *flags, + session->is_nonblocking); + } + else + rv = VPPCOM_EINVAL; + break; + + case VPPCOM_ATTR_GET_PEER_ADDR: + if (buffer && buflen && (*buflen >= sizeof (*ep))) + { + ep->vrf = session->vrf; + ep->is_ip4 = session->peer_addr.is_ip4; + ep->port = session->port; + if (session->peer_addr.is_ip4) + clib_memcpy (ep->ip, &session->peer_addr.ip46.ip4, + sizeof (ip4_address_t)); + else + clib_memcpy (ep->ip, &session->peer_addr.ip46.ip6, + sizeof (ip6_address_t)); + *buflen = sizeof (*ep); + if (VPPCOM_DEBUG > 0) + clib_warning ("VPPCOM_ATTR_GET_PEER_ADDR: is_ip4 = %u, " + "addr = %U", ep->is_ip4, format_ip46_address, + &session->peer_addr.ip46, ep->is_ip4); + } + else + rv = VPPCOM_EINVAL; + break; + + case VPPCOM_ATTR_GET_LCL_ADDR: + if (buffer && buflen && (*buflen >= sizeof (*ep))) + { + ep->vrf = session->vrf; + ep->is_ip4 = session->lcl_addr.is_ip4; + ep->port = session->port; + if (session->lcl_addr.is_ip4) + clib_memcpy (ep->ip, &session->lcl_addr.ip46.ip4, + sizeof (ip4_address_t)); + else + clib_memcpy (ep->ip, &session->lcl_addr.ip46.ip6, + sizeof (ip6_address_t)); + *buflen = sizeof (*ep); + if (VPPCOM_DEBUG > 0) + if (VPPCOM_DEBUG > 0) + clib_warning ("VPPCOM_ATTR_GET_LCL_ADDR: is_ip4 = %u, " + "addr = %U", ep->is_ip4, format_ip46_address, + &session->lcl_addr.ip46, ep->is_ip4); + } + else + rv = VPPCOM_EINVAL; + break; + + case VPPCOM_ATTR_SET_REUSEADDR: + break; + + case VPPCOM_ATTR_SET_BROADCAST: + break; + + case VPPCOM_ATTR_SET_V6ONLY: + break; + + case VPPCOM_ATTR_SET_KEEPALIVE: + break; + + case VPPCOM_ATTR_SET_TCP_KEEPIDLE: + break; + + case VPPCOM_ATTR_SET_TCP_KEEPINTVL: + break; + + default: + rv = VPPCOM_EINVAL; + break; + } + +done: + clib_spinlock_unlock (&vcm->sessions_lockp); + return rv; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vcl/vppcom.h b/src/vcl/vppcom.h new file mode 100644 index 00000000000..dd72986ec53 --- /dev/null +++ b/src/vcl/vppcom.h @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vppcom_h +#define included_vppcom_h + +#include <netdb.h> +#include <errno.h> +#include <sys/epoll.h> + +/* + * VPPCOM Public API Definitions, Enums, and Data Structures + */ +#define INVALID_SESSION_ID (~0) +#define VPPCOM_VRF_DEFAULT 0 +#define VPPCOM_CONF_ENV "VPPCOM_CONF" +#define VPPCOM_CONF_DEFAULT "/etc/vpp/vppcom.conf" + +typedef enum +{ + VPPCOM_PROTO_TCP = 0, + VPPCOM_PROTO_UDP, +} vppcom_proto_t; + +typedef enum +{ + VPPCOM_IS_IP6 = 0, + VPPCOM_IS_IP4, +} vppcom_is_ip4_t; + +typedef struct vppcom_endpt_t_ +{ + uint32_t vrf; + uint8_t is_cut_thru; + uint8_t is_ip4; + uint8_t *ip; + uint16_t port; +} vppcom_endpt_t; + +typedef enum +{ + VPPCOM_OK = 0, + VPPCOM_EAGAIN = -EAGAIN, + VPPCOM_EINVAL = -EINVAL, + VPPCOM_EBADFD = -EBADFD, + VPPCOM_EAFNOSUPPORT = -EAFNOSUPPORT, + VPPCOM_ECONNRESET = -ECONNRESET, + VPPCOM_ECONNREFUSED = -ECONNREFUSED, + VPPCOM_ETIMEDOUT = -ETIMEDOUT, +} vppcom_error_t; + +typedef enum +{ + VPPCOM_ATTR_GET_NREAD, + VPPCOM_ATTR_PEEK_NREAD, + VPPCOM_ATTR_GET_FLAGS, + VPPCOM_ATTR_SET_FLAGS, + VPPCOM_ATTR_GET_LCL_ADDR, + VPPCOM_ATTR_GET_PEER_ADDR, + VPPCOM_ATTR_SET_REUSEADDR, + VPPCOM_ATTR_SET_BROADCAST, + VPPCOM_ATTR_SET_V6ONLY, + VPPCOM_ATTR_SET_KEEPALIVE, + VPPCOM_ATTR_SET_TCP_KEEPIDLE, + VPPCOM_ATTR_SET_TCP_KEEPINTVL, +} vppcom_attr_op_t; + +/* + * VPPCOM Public API Functions + */ +static inline const char * +vppcom_retval_str (int retval) +{ + char *st; + + switch (retval) + { + case VPPCOM_OK: + st = "VPPCOM_OK"; + break; + + case VPPCOM_EAGAIN: + st = "VPPCOM_EAGAIN"; + break; + + case VPPCOM_EINVAL: + st = "VPPCOM_EINVAL"; + break; + + case VPPCOM_EBADFD: + st = "VPPCOM_EBADFD"; + break; + + case VPPCOM_EAFNOSUPPORT: + st = "VPPCOM_EAFNOSUPPORT"; + break; + + case VPPCOM_ECONNRESET: + st = "VPPCOM_ECONNRESET"; + break; + + case VPPCOM_ECONNREFUSED: + st = "VPPCOM_ECONNREFUSED"; + break; + + case VPPCOM_ETIMEDOUT: + st = "VPPCOM_ETIMEDOUT"; + break; + + default: + st = "UNKNOWN_STATE"; + break; + } + + return st; +} + +/* TBD: make these constructor/destructor function */ +extern int vppcom_app_create (char *app_name); +extern void vppcom_app_destroy (void); + +extern int vppcom_session_create (uint32_t vrf, uint8_t proto, + uint8_t is_nonblocking); +extern int vppcom_session_close (uint32_t session_index); + +extern int vppcom_session_bind (uint32_t session_index, vppcom_endpt_t * ep); +extern int vppcom_session_listen (uint32_t session_index, uint32_t q_len); +extern int vppcom_session_accept (uint32_t session_index, + vppcom_endpt_t * client_ep, + double wait_for_time); + +extern int vppcom_session_connect (uint32_t session_index, + vppcom_endpt_t * server_ep); +extern int vppcom_session_read (uint32_t session_index, void *buf, int n); +extern int vppcom_session_write (uint32_t session_index, void *buf, int n); + +extern int vppcom_select (unsigned long n_bits, + unsigned long *read_map, + unsigned long *write_map, + unsigned long *except_map, double wait_for_time); + +extern int vppcom_epoll_create (void); +extern int vppcom_epoll_ctl (uint32_t vep_idx, int op, + uint32_t session_index, + struct epoll_event *event); +extern int vppcom_epoll_wait (uint32_t vep_idx, struct epoll_event *events, + int maxevents, double wait_for_time); +extern int vppcom_session_attr (uint32_t session_index, uint32_t op, + void *buffer, uint32_t * buflen); + +#endif /* included_vppcom_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vcl/vppcom_test.conf b/src/vcl/vppcom_test.conf new file mode 100644 index 00000000000..e5ac46363f8 --- /dev/null +++ b/src/vcl/vppcom_test.conf @@ -0,0 +1,25 @@ +# Test VPPCOM config file +vppcom { + heapsize 1 + api-prefix daw # this is a comment + uid 1020 this is also a comment. + gid 1020 +# This is yet another comment! + segment-baseva 0x300000000 + segment-size 0x10000000 + segment-size 268435456 + add-segment-size 0x1000000 + add-segment-size 134217728 + preallocated-fifo-pairs 16 + rx-fifo-size 3145728 + rx-fifo-size 0x300000 + tx-fifo-size 3145728 + tx-fifo-size 0x300000 + event-queue-size 1024 + event-queue-size 0x400 + listen-queue-size 32 + listen-queue-size 0x20 + app-timeout 54.3 + session-timeout 66.6 + accept-timeout 0.1 +} |