aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDave Wallace <dwallacelf@gmail.com>2017-10-24 04:12:18 -0400
committerKeith Burns <alagalah@gmail.com>2017-10-24 18:25:31 +0000
commit5c7cf1cc5358d137160be1619981e7eea9a7402f (patch)
treee981f6fc831e9c4e240e765d506ac8d58be43123 /src
parent4c151b5fa13312639b0a5ef0d0efb1866bf4ed5e (diff)
VCL-LDPRELOAD: statically link vppcom into libvcl-ldpreload.so
- Move VCL & VCL-LDPRELOAD source into src/vcl - Statically link vppcom into libvcl-ldpreload.so Change-Id: I778300b37e8b06640d9dbc01caf297edf7a6edb7 Signed-off-by: Dave Wallace <dwallacelf@gmail.com>
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am1
-rw-r--r--src/uri.am40
-rw-r--r--src/vcl.am65
-rw-r--r--src/vcl/sock_test.h (renamed from src/uri/sock_test.h)0
-rw-r--r--src/vcl/sock_test_client.c (renamed from src/uri/sock_test_client.c)2
-rw-r--r--src/vcl/sock_test_server.c (renamed from src/uri/sock_test_server.c)2
-rw-r--r--src/vcl/vcl_test_client.c (renamed from src/uri/vcl_test_client.c)4
-rw-r--r--src/vcl/vcl_test_server.c (renamed from src/uri/vcl_test_server.c)4
-rw-r--r--src/vcl/vcom.c3334
-rw-r--r--src/vcl/vcom.h204
-rw-r--r--src/vcl/vcom_glibc_socket.h351
-rw-r--r--src/vcl/vcom_socket.c3443
-rw-r--r--src/vcl/vcom_socket.h471
-rw-r--r--src/vcl/vcom_socket_wrapper.c906
-rw-r--r--src/vcl/vcom_socket_wrapper.h235
-rw-r--r--src/vcl/vppcom.c (renamed from src/uri/vppcom.c)2
-rw-r--r--src/vcl/vppcom.h (renamed from src/uri/vppcom.h)0
-rw-r--r--src/vcl/vppcom_test.conf (renamed from src/uri/vppcom_test.conf)0
18 files changed, 9020 insertions, 44 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 7b35e50c37a..c1b6b0e9d89 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -68,6 +68,7 @@ include vnet.am
include vpp.am
include vpp-api-test.am
include uri.am
+include vcl.am
SUBDIRS += plugins
diff --git a/src/uri.am b/src/uri.am
index 660f897dd71..176eac88859 100644
--- a/src/uri.am
+++ b/src/uri.am
@@ -11,34 +11,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-lib_LTLIBRARIES += libvppcom.la
-
-libvppcom_la_SOURCES =
-libvppcom_la_DEPENDENCIES = \
- libvppinfra.la \
- libvlib.la \
- libsvmdb.la \
- libsvm.la \
- libvlibmemory.la \
- libvlibmemoryclient.la
-
-libvppcom_la_LIBADD = $(libvppcom_la_DEPENDENCIES) -lpthread
-
-libvppcom_la_SOURCES += \
- uri/vppcom.c
-
-nobase_include_HEADERS += \
- uri/vppcom.h
-
noinst_PROGRAMS += \
uri_udp_test \
uri_tcp_test \
uri_socket_test \
- uri_socket_server \
- vcl_test_server \
- vcl_test_client \
- sock_test_server \
- sock_test_client
+ uri_socket_server
uri_udp_test_SOURCES = uri/uri_udp_test.c
uri_udp_test_LDADD = libvlibmemoryclient.la libsvm.la \
@@ -49,19 +26,8 @@ uri_tcp_test_LDADD = libvlibmemoryclient.la libsvm.la \
libvppinfra.la -lpthread -lm -lrt
uri_socket_test_SOURCES = uri/uri_socket_test.c
-uri_socket_test_LDADD = libvppinfra.la -lpthread -lm -lrt
+uri_socket_test_LDADD = libvppinfra.la libvcl_ldpreload.la -lpthread -lm -lrt
uri_socket_server_SOURCES = uri/uri_socket_server.c
-uri_socket_server_LDADD = libvppinfra.la -lpthread -lm -lrt
-
-vcl_test_server_SOURCES = uri/vcl_test_server.c
-vcl_test_server_LDADD = libvppcom.la
-
-vcl_test_client_SOURCES = uri/vcl_test_client.c
-vcl_test_client_LDADD = libvppcom.la
-
-sock_test_server_SOURCES = uri/sock_test_server.c
-sock_test_client_SOURCES = uri/sock_test_client.c
+uri_socket_server_LDADD = libvppinfra.la libvcl_ldpreload.la -lpthread -lm -lrt
-nobase_include_HEADERS += \
- uri/sock_test.h
diff --git a/src/vcl.am b/src/vcl.am
new file mode 100644
index 00000000000..de8b4da7e27
--- /dev/null
+++ b/src/vcl.am
@@ -0,0 +1,65 @@
+# Copyright (c) 2017 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+lib_LTLIBRARIES += libvppcom.la libvcl_ldpreload.la
+
+libvppcom_la_SOURCES =
+libvcl_ldpreload_la_SOURCES =
+libvppcom_la_DEPENDENCIES = \
+ libvppinfra.la \
+ libvlib.la \
+ libsvmdb.la \
+ libsvm.la \
+ libvlibmemory.la \
+ libvlibmemoryclient.la
+
+libvcl_ldpreload_la_DEPENDENCIES = $(libvppcom_la_DEPENDENCIES)
+
+libvppcom_la_LIBADD = $(libvppcom_la_DEPENDENCIES) -lpthread
+libvcl_ldpreload_la_LIBADD = $(libvcl_ldpreload_la_DEPENDENCIES) -lpthread
+
+libvppcom_la_SOURCES += \
+ vcl/vppcom.c
+
+nobase_include_HEADERS += \
+ vcl/vppcom.h
+
+libvcl_ldpreload_la_SOURCES += \
+ vcl/vcom_socket_wrapper.c \
+ vcl/vcom.c \
+ vcl/vcom_socket.c \
+ vcl/vppcom.c
+
+nobase_include_HEADERS += \
+ vcl/vcom_socket_wrapper.h \
+ vcl/vcom_glibc_socket.h \
+ vcl/vcom.h \
+ vcl/vcom_socket.h
+
+noinst_PROGRAMS += \
+ vcl_test_server \
+ vcl_test_client \
+ sock_test_server \
+ sock_test_client
+
+vcl_test_server_SOURCES = vcl/vcl_test_server.c
+vcl_test_server_LDADD = libvppcom.la
+
+vcl_test_client_SOURCES = vcl/vcl_test_client.c
+vcl_test_client_LDADD = libvppcom.la
+
+sock_test_server_SOURCES = vcl/sock_test_server.c
+sock_test_client_SOURCES = vcl/sock_test_client.c
+
+nobase_include_HEADERS += \
+ vcl/sock_test.h
diff --git a/src/uri/sock_test.h b/src/vcl/sock_test.h
index 281ba6fd473..281ba6fd473 100644
--- a/src/uri/sock_test.h
+++ b/src/vcl/sock_test.h
diff --git a/src/uri/sock_test_client.c b/src/vcl/sock_test_client.c
index b53f8f2230f..40ba043adae 100644
--- a/src/uri/sock_test_client.c
+++ b/src/vcl/sock_test_client.c
@@ -22,7 +22,7 @@
#include <stdio.h>
#include <time.h>
#include <arpa/inet.h>
-#include <uri/sock_test.h>
+#include <vcl/sock_test.h>
typedef struct
{
diff --git a/src/uri/sock_test_server.c b/src/vcl/sock_test_server.c
index 79e762def62..753a7da790c 100644
--- a/src/uri/sock_test_server.c
+++ b/src/vcl/sock_test_server.c
@@ -21,7 +21,7 @@
#include <string.h>
#include <time.h>
#include <ctype.h>
-#include <uri/sock_test.h>
+#include <vcl/sock_test.h>
#include <sys/stat.h>
#include <fcntl.h>
diff --git a/src/uri/vcl_test_client.c b/src/vcl/vcl_test_client.c
index 7ab8824fb15..e1a4c6b74d4 100644
--- a/src/uri/vcl_test_client.c
+++ b/src/vcl/vcl_test_client.c
@@ -15,8 +15,8 @@
#define VCL_TEST
-#include <uri/vppcom.h>
-#include <uri/sock_test_client.c>
+#include <vcl/vppcom.h>
+#include <vcl/sock_test_client.c>
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/uri/vcl_test_server.c b/src/vcl/vcl_test_server.c
index 660d705e037..e91d2ecd7c8 100644
--- a/src/uri/vcl_test_server.c
+++ b/src/vcl/vcl_test_server.c
@@ -15,8 +15,8 @@
#define VCL_TEST
-#include <uri/vppcom.h>
-#include <uri/sock_test_server.c>
+#include <vcl/vppcom.h>
+#include <vcl/sock_test_server.c>
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vcl/vcom.c b/src/vcl/vcom.c
new file mode 100644
index 00000000000..6d98fdb75c0
--- /dev/null
+++ b/src/vcl/vcom.c
@@ -0,0 +1,3334 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <unistd.h>
+#include <stdio.h>
+#include <signal.h>
+#include <dlfcn.h>
+#include <pthread.h>
+#include <time.h>
+#include <stdarg.h>
+#include <sys/resource.h>
+
+#include <vcl/vcom_socket_wrapper.h>
+#include <vcl/vcom.h>
+#include <sys/time.h>
+
+#include <vcl/vppcom.h>
+#include <vcl/vcom_socket.h>
+
+/* GCC have printf type attribute check. */
+#ifdef HAVE_FUNCTION_ATTRIBUTE_FORMAT
+#define PRINTF_ATTRIBUTE(a,b) \
+ __attribute__ ((__format__ (__printf__, a, b)))
+#else
+#define PRINTF_ATTRIBUTE(a,b)
+#endif /* HAVE_FUNCTION_ATTRIBUTE_FORMAT */
+
+#define HAVE_CONSTRUCTOR_ATTRIBUTE
+#ifdef HAVE_CONSTRUCTOR_ATTRIBUTE
+#define CONSTRUCTOR_ATTRIBUTE \
+ __attribute__ ((constructor))
+#else
+#define CONSTRUCTOR_ATTRIBUTE
+#endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */
+
+#define HAVE_DESTRUCTOR_ATTRIBUTE
+#ifdef HAVE_DESTRUCTOR_ATTRIBUTE
+#define DESTRUCTOR_ATTRIBUTE \
+ __attribute__ ((destructor))
+#else
+#define DESTRUCTOR_ATTRIBUTE
+#endif
+
+#define HAVE_ADDRESS_SANITIZER_ATTRIBUTE
+#ifdef HAVE_ADDRESS_SANITIZER_ATTRIBUTE
+#define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE \
+ __attribute__((no_sanitize_address))
+#else
+#define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE
+#endif
+
+#define VCOM_SOCKET_FD_MAX 0x10000
+
+static char vcom_app_name[MAX_VCOM_APP_NAME];
+
+/*
+ * RETURN: 0 on success or -1 on error.
+ * */
+int
+vcom_set_app_name (char *__app_name)
+{
+ return snprintf (vcom_app_name, MAX_VCOM_APP_NAME, "vcom-%s-%d",
+ __app_name, getpid ()) < 0 ? -1 : 0;
+}
+
+static char *
+vcom_get_app_name ()
+{
+ if (vcom_app_name[0] == '\0')
+ {
+ snprintf (vcom_app_name, MAX_VCOM_APP_NAME, "vcom-app-%d", getpid ());
+ }
+ return vcom_app_name;
+}
+
+/*
+ * 1 if init, 0 otherwise
+ */
+static int is_vcom_init;
+
+/*
+ * TBD: Make it thread safe
+ */
+
+/*
+ * constructor function called before main is called
+ * RETURN: 0 on success -1 on failure
+ * */
+static inline int
+vcom_init ()
+{
+ pid_t pid = getpid ();
+
+ if (!is_vcom_init)
+ {
+ if (vppcom_app_create (vcom_get_app_name ()) != 0)
+ {
+ printf ("\n[%d] vcom_init...failed!\n", pid);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] vcom_init: vppcom_app_create failed!\n", pid);
+ return -1;
+ }
+ if (vcom_socket_main_init () != 0)
+ {
+ printf ("\n[%d] vcom_init...failed!\n", pid);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] vcom_init: vcom_socket_main_init failed!\n", pid);
+ return -1;
+ }
+
+ is_vcom_init = 1;
+ printf ("\n[%d] vcom_init...done!\n", pid);
+ }
+ return 0;
+}
+
+static inline void
+vcom_destroy (void)
+{
+ pid_t pid = getpid ();
+
+ if (is_vcom_init)
+ {
+ vcom_socket_main_destroy ();
+ vppcom_app_destroy ();
+ is_vcom_init = 0;
+ fprintf (stderr, "\n[%d] vcom_destroy...done!\n", pid);
+ }
+}
+
+static inline int
+is_vcom_socket_fd (int fd)
+{
+ return vcom_socket_is_vcom_fd (fd);
+}
+
+static inline int
+is_vcom_epfd (int epfd)
+{
+ return vcom_socket_is_vcom_epfd (epfd);
+}
+
+
+/*
+ *
+ * Generic glibc fd api
+ *
+ */
+
+/* Close the file descriptor FD.
+
+ This function is a cancellation point and therefore
+ not marked with __THROW. */
+/*
+ * PRE: is_vcom_socket_fd(__fd) == 1
+ * RETURN: 0 on success and -1 for errors.
+ * */
+int
+vcom_close (int __fd)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ if (vcom_socket_close (__fd) != 0)
+ {
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * RETURN: 0 on success, or -1 on error
+ */
+int
+close (int __fd)
+{
+ int rv;
+ pid_t pid = getpid ();
+
+ if (is_vcom_socket_fd (__fd) || is_vcom_epfd (__fd))
+ {
+ if (VCOM_DEBUG > 0)
+ vcom_socket_main_show ();
+ rv = vcom_close (__fd);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr, "[%d] close: " "'%04d'='%04d'\n", pid, rv, __fd);
+ if (VCOM_DEBUG > 0)
+ vcom_socket_main_show ();
+ if (rv != 0)
+ {
+ errno = -rv;
+ return -1;
+ }
+ return 0;
+ }
+ return libc_close (__fd);
+}
+
+/* Read NBYTES into BUF from FD. Return the
+ number read, -1 for errors or 0 for EOF.
+
+ This function is a cancellation point and therefore
+ not marked with __THROW. */
+ssize_t
+vcom_read (int __fd, void *__buf, size_t __nbytes)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_read (__fd, __buf, __nbytes);
+}
+
+ssize_t
+read (int __fd, void *__buf, size_t __nbytes)
+{
+ ssize_t size = 0;
+ pid_t pid = getpid ();
+ pthread_t tid = pthread_self ();
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ if (VCOM_DEBUG > 2)
+ fprintf (stderr,
+ "[%d][%lu (0x%lx)] read:1 "
+ "'%04d'='%04d', '%p', '%04d'\n",
+ pid, (unsigned long) tid, (unsigned long) tid,
+ (int) size, __fd, __buf, (int) __nbytes);
+ size = vcom_read (__fd, __buf, __nbytes);
+ if (VCOM_DEBUG > 2)
+ fprintf (stderr,
+ "[%d][%lu (0x%lx)] read:2 "
+ "'%04d'='%04d', '%p', '%04d'\n",
+ pid, (unsigned long) tid, (unsigned long) tid,
+ (int) size, __fd, __buf, (int) __nbytes);
+ if (size < 0)
+ {
+ errno = -size;
+ return -1;
+ }
+ return size;
+ }
+ return libc_read (__fd, __buf, __nbytes);
+}
+
+ssize_t
+vcom_readv (int __fd, const struct iovec * __iov, int __iovcnt)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_readv (__fd, __iov, __iovcnt);
+}
+
+ssize_t
+readv (int __fd, const struct iovec * __iov, int __iovcnt)
+{
+ ssize_t size = 0;
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ size = vcom_readv (__fd, __iov, __iovcnt);
+ if (size < 0)
+ {
+ errno = -size;
+ return -1;
+ }
+ return size;
+ }
+ else
+ return libc_readv (__fd, __iov, __iovcnt);
+}
+
+/* Write N bytes of BUF to FD. Return the number written, or -1.
+
+ This function is a cancellation point and therefore
+ not marked with __THROW. */
+ssize_t
+vcom_write (int __fd, const void *__buf, size_t __n)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_write (__fd, (void *) __buf, __n);
+}
+
+ssize_t
+write (int __fd, const void *__buf, size_t __n)
+{
+ ssize_t size = 0;
+ pid_t pid = getpid ();
+ pthread_t tid = pthread_self ();
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ if (VCOM_DEBUG > 2)
+ fprintf (stderr,
+ "[%d][%lu (0x%lx)] write:1 "
+ "'%04d'='%04d', '%p', '%04d'\n",
+ pid, (unsigned long) tid, (unsigned long) tid,
+ (int) size, __fd, __buf, (int) __n);
+ size = vcom_write (__fd, __buf, __n);
+ if (VCOM_DEBUG > 2)
+ fprintf (stderr,
+ "[%d][%lu (0x%lx)] write:2 "
+ "'%04d'='%04d', '%p', '%04d'\n",
+ pid, (unsigned long) tid, (unsigned long) tid,
+ (int) size, __fd, __buf, (int) __n);
+ if (size < 0)
+ {
+ errno = -size;
+ return -1;
+ }
+ return size;
+ }
+ return libc_write (__fd, __buf, __n);
+}
+
+ssize_t
+vcom_writev (int __fd, const struct iovec * __iov, int __iovcnt)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_writev (__fd, __iov, __iovcnt);
+}
+
+ssize_t
+writev (int __fd, const struct iovec * __iov, int __iovcnt)
+{
+ ssize_t size = 0;
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ size = vcom_writev (__fd, __iov, __iovcnt);
+ if (size < 0)
+ {
+ errno = -size;
+ return -1;
+ }
+ return size;
+ }
+ else
+ return libc_writev (__fd, __iov, __iovcnt);
+}
+
+/* Do the file control operation described by CMD on FD.
+ The remaining arguments are interpreted depending on CMD.
+
+ This function is a cancellation point and therefore
+ not marked with __THROW. */
+int
+vcom_fcntl_va (int __fd, int __cmd, va_list __ap)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_fcntl_va (__fd, __cmd, __ap);
+}
+
+int
+vcom_fcntl (int __fd, int __cmd, ...)
+{
+ int rv = -1;
+ va_list ap;
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ va_start (ap, __cmd);
+ rv = vcom_fcntl_va (__fd, __cmd, ap);
+ va_end (ap);
+ }
+ return rv;
+}
+
+int
+fcntl (int __fd, int __cmd, ...)
+{
+ int rv;
+ va_list ap;
+ pid_t pid = getpid ();
+
+ va_start (ap, __cmd);
+ if (is_vcom_socket_fd (__fd))
+ {
+ rv = vcom_fcntl_va (__fd, __cmd, ap);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] fcntl: "
+ "'%04d'='%04d', '%04d'\n", pid, rv, __fd, __cmd);
+ if (rv < 0)
+ {
+ errno = -rv;
+ rv = -1;
+ }
+ goto out;
+ }
+ rv = libc_vfcntl (__fd, __cmd, ap);
+
+out:
+ va_end (ap);
+ return rv;
+}
+
+int
+vcom_ioctl_va (int __fd, unsigned long int __cmd, va_list __ap)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_ioctl_va (__fd, __cmd, __ap);
+}
+
+int
+vcom_ioctl (int __fd, unsigned long int __cmd, ...)
+{
+ int rv = -1;
+ va_list ap;
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ va_start (ap, __cmd);
+ rv = vcom_ioctl_va (__fd, __cmd, ap);
+ va_end (ap);
+ }
+ return rv;
+}
+
+int
+ioctl (int __fd, unsigned long int __cmd, ...)
+{
+ int rv;
+ va_list ap;
+ pid_t pid = getpid ();
+
+ va_start (ap, __cmd);
+ if (is_vcom_socket_fd (__fd))
+ {
+ rv = vcom_ioctl_va (__fd, __cmd, ap);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] ioctl: "
+ "'%04d'='%04d', '%04ld'\n", pid, rv, __fd, __cmd);
+ if (rv < 0)
+ {
+ errno = -rv;
+ rv = -1;
+ }
+ goto out;
+ }
+ rv = libc_vioctl (__fd, __cmd, ap);
+
+out:
+ va_end (ap);
+ return rv;
+}
+
+/*
+ * Check the first NFDS descriptors each in READFDS (if not NULL) for
+ * read readiness, in WRITEFDS (if not NULL) for write readiness,
+ * and in EXCEPTFDS (if not NULL) for exceptional conditions.
+ * If TIMEOUT is not NULL, time out after waiting the interval
+ * specified therein. Returns the number of ready descriptors,
+ * or -1 for errors.
+ *
+ * This function is a cancellation point and therefore not marked
+ * with __THROW.
+ * */
+
+/*
+ * clear all vcom FDs from fd_sets __readfds, __writefds and
+ * __exceptfds and update the new nfds
+ *
+ * new nfds is the highest-numbered file descriptor
+ * in any of the three sets, plus 1
+ *
+ * Return the number of file descriptors contained in the
+ * three descriptor sets. ie. the total number of the bits
+ * that are set in __readfds, __writefds and __exceptfds
+ */
+static inline int
+vcom_fd_clear (int __nfds,
+ int *__new_nfds,
+ fd_set * __restrict __readfds,
+ fd_set * __restrict __writefds,
+ fd_set * __restrict __exceptfds)
+{
+ int fd;
+ /* invalid max_fd is -1 */
+ int max_fd = -1;
+ int nfd = 0;
+
+
+ /* clear all vcom fd from the sets */
+ for (fd = 0; fd < __nfds; fd++)
+ {
+
+ /* clear vcom fd from set */
+ /*
+ * F fd set
+ */
+#define _(F) \
+ if ((F) && FD_ISSET (fd, (F))) \
+ { \
+ if (is_vcom_socket_fd (fd)) \
+ { \
+ FD_CLR (fd, (F)); \
+ } \
+ }
+
+
+ _(__readfds);
+ _(__writefds);
+ _(__exceptfds);
+#undef _
+ }
+
+ /*
+ * compute nfd and __new_nfds
+ */
+ for (fd = 0; fd < __nfds; fd++)
+ {
+
+ /*
+ * F fd set
+ */
+#define _(F) \
+ if ((F) && FD_ISSET (fd, (F))) \
+ { \
+ if (fd > max_fd) \
+ { \
+ max_fd = fd; \
+ } \
+ ++nfd; \
+ }
+
+
+ _(__readfds);
+ _(__writefds);
+ _(__exceptfds);
+#undef _
+ }
+
+ *__new_nfds = max_fd != -1 ? max_fd + 1 : 0;
+ return nfd;
+}
+
+/*
+ * Return the number of file descriptors contained in the
+ * three descriptor sets. ie. the total number of the bits
+ * that are set in __readfds, __writefds and __exceptfds
+ */
+static inline int
+vcom_fd_set (int __nfds,
+ /* dest */
+ int *__new_nfds,
+ fd_set * __restrict __readfds,
+ fd_set * __restrict __writefds, fd_set * __restrict __exceptfds,
+ /* src */
+ fd_set * __restrict __saved_readfds,
+ fd_set * __restrict __saved_writefds,
+ fd_set * __restrict __saved_exceptfds)
+{
+ int fd;
+ /* invalid max_fd is -1 */
+ int max_fd = -1;
+ int nfd = 0;
+
+ for (fd = 0; fd < __nfds; fd++)
+ {
+ /*
+ * F fd set
+ * S saved fd set
+ */
+#define _(S,F) \
+ if ((F) && (S) && FD_ISSET (fd, (S))) \
+ { \
+ if (is_vcom_socket_fd (fd)) \
+ { \
+ FD_SET (fd, (F)); \
+ } \
+ }
+
+
+ _(__saved_readfds, __readfds);
+ _(__saved_writefds, __writefds);
+#undef _
+ }
+
+
+ /*
+ * compute nfd and __new_nfds
+ */
+ for (fd = 0; fd < __nfds; fd++)
+ {
+
+ /*
+ * F fd set
+ */
+#define _(F) \
+ if ((F) && FD_ISSET (fd, (F))) \
+ { \
+ if (fd > max_fd) \
+ { \
+ max_fd = fd; \
+ } \
+ ++nfd; \
+ }
+
+
+ _(__readfds);
+ _(__writefds);
+ _(__exceptfds);
+#undef _
+ }
+
+ *__new_nfds = max_fd != -1 ? max_fd + 1 : 0;
+ return nfd;
+}
+
+/*
+ * split select sets(src) into
+ * vcom sets(dest1) and libc sets(dest2)
+ */
+static inline void
+vcom_fd_set_split (
+ /* src, select sets */
+ int nfds,
+ fd_set * __restrict readfds,
+ fd_set * __restrict writefds,
+ fd_set * __restrict exceptfds,
+ /* dest1, vcom sets */
+ int *vcom_nfds,
+ fd_set * __restrict vcom_readfds,
+ fd_set * __restrict vcom_writefds,
+ fd_set * __restrict vcom_exceptfds, int *vcom_nfd,
+ /* dest2, libc sets */
+ int *libc_nfds,
+ fd_set * __restrict libc_readfds,
+ fd_set * __restrict libc_writefds,
+ fd_set * __restrict libc_exceptfds, int *libc_nfd)
+{
+ int fd;
+
+ /* vcom */
+ /* invalid max_fd is -1 */
+ int vcom_max_fd = -1;
+ int vcom_nfd2 = 0;
+
+ /* libc */
+ /* invalid max_fd is -1 */
+ int libc_max_fd = -1;
+ int libc_nfd2 = 0;
+
+
+ for (fd = 0; fd < nfds; fd++)
+ {
+ /*
+ * S select fd set
+ * V vcom fd set
+ * L libc fd set
+ */
+#define _(S,V,L) \
+ if ((S) && FD_ISSET (fd, (S))) \
+ { \
+ if (is_vcom_socket_fd (fd)) \
+ { \
+ if ((V)) \
+ { \
+ FD_SET(fd, (V)); \
+ if (fd > vcom_max_fd) \
+ { \
+ vcom_max_fd = fd; \
+ } \
+ ++vcom_nfd2; \
+ } \
+ } \
+ else \
+ { \
+ if ((L)) \
+ { \
+ FD_SET(fd, (L)); \
+ if (fd > libc_max_fd) \
+ { \
+ libc_max_fd = fd; \
+ } \
+ ++libc_nfd2; \
+ } \
+ } \
+ }
+
+
+ _(readfds, vcom_readfds, libc_readfds);
+ _(writefds, vcom_writefds, libc_writefds);
+ _(exceptfds, vcom_exceptfds, libc_exceptfds);
+#undef _
+ }
+
+ if (vcom_nfds)
+ *vcom_nfds = vcom_max_fd != -1 ? vcom_max_fd + 1 : 0;
+ if (vcom_nfd)
+ *vcom_nfd = vcom_nfd2;
+ if (libc_nfds)
+ *libc_nfds = libc_max_fd != -1 ? libc_max_fd + 1 : 0;
+ if (libc_nfd)
+ *libc_nfd = libc_nfd2;
+}
+
+/*
+ * merge vcom sets(src1) and libc sets(src2)
+ * into select sets(dest)
+ */
+static inline void
+vcom_fd_set_merge (
+ /* dest, select sets */
+ int *nfds,
+ fd_set * __restrict readfds,
+ fd_set * __restrict writefds,
+ fd_set * __restrict exceptfds, int *nfd,
+ /* src1, vcom sets */
+ int vcom_nfds,
+ fd_set * __restrict vcom_readfds,
+ fd_set * __restrict vcom_writefds,
+ fd_set * __restrict vcom_exceptfds, int vcom_nfd,
+ /* src2, libc sets */
+ int libc_nfds,
+ fd_set * __restrict libc_readfds,
+ fd_set * __restrict libc_writefds,
+ fd_set * __restrict libc_exceptfds, int libc_nfd)
+{
+ int fd;
+ /* invalid max_fd is -1 */
+ int max_fd = -1;
+ int nfd2 = 0;
+
+
+ /* FD_BIT_OR
+ *
+ * dest |= src at current bit index
+ * update MAX and NFD of dest fd set
+ *
+ *
+ * FS source fd set
+ * FD dest fd set
+ * BI bit index
+ * MAX current max_fd of dest fd sets
+ * NFD current nfd of dest fd sets
+ * N nfds of source fd set
+ */
+#define FD_BIT_OR(FD,FS,BI, \
+ MAX,NFD) \
+ if ((FS) && (FD) && FD_ISSET ((BI), (FS))) \
+ { \
+ FD_SET ((BI), (FD)); \
+ if ((BI) > (MAX)) \
+ { \
+ (MAX) = (BI); \
+ } \
+ ++(NFD); \
+ }
+
+
+ /* FD_RWE_SET_OR */
+ /*
+ * SR,SW,SE source RWE fd sets
+ * DR,DW,DE dest RWE fd sets
+ * BI bit index
+ * NFDS nfds of source fd sets
+ * MAX current max_fd of dest fd sets
+ * NFD current nfd of dest fd sets
+ */
+#define FD_RWE_SETS_OR(DR,DW,DE, \
+ SR,SW,SE, \
+ BI,NFDS, \
+ MAX,NFD) \
+ do \
+ { \
+ for ((BI) = 0; (BI) < (NFDS); (BI)++) \
+ { \
+ FD_BIT_OR((DR), (SR), (BI), (MAX), (NFD)); \
+ FD_BIT_OR((DW), (SW), (BI), (MAX), (NFD)); \
+ FD_BIT_OR((DE), (SE), (BI), (MAX), (NFD)); \
+ } \
+ } \
+ while (0);
+
+
+ /* source(vcom) to dest(select) rwe fd sets */
+ FD_RWE_SETS_OR (readfds, writefds, exceptfds,
+ vcom_readfds, vcom_writefds, vcom_exceptfds,
+ fd, vcom_nfds, max_fd, nfd2);
+
+ /* source(libc) to dest(select) rwe fd sets */
+ FD_RWE_SETS_OR (readfds, writefds, exceptfds,
+ libc_readfds, libc_writefds, libc_exceptfds,
+ fd, libc_nfds, max_fd, nfd2);
+
+#undef FD_RWE_SETS_OR
+#undef FD_BIT_OR
+
+ if (nfds)
+ *nfds = max_fd != -1 ? max_fd + 1 : 0;
+ if (nfd)
+ *nfd = nfd2;
+}
+
+/*
+ * RETURN 1 if fds is NULL or empty. 0 otherwise
+ */
+static inline int
+fd_set_iszero (fd_set * __restrict fds)
+{
+ int fd;
+
+ /* NULL fds */
+ if (!fds)
+ return 1;
+
+ for (fd = 0; fd < FD_SETSIZE; fd++)
+ {
+ if (FD_ISSET (fd, fds))
+ {
+ /* non-empty fds */
+ return 0;
+ }
+ }
+ /* empty fds */
+ return 1;
+}
+
+
+/*
+ * ################
+ * kernel time64.h
+ * ################
+ * */
+typedef long int s64;
+typedef unsigned long int u64;
+
+typedef long long int __s64;
+typedef unsigned long long int __u64;
+
+typedef __s64 time64_t;
+typedef __u64 timeu64_t;
+
+/* Parameters used to convert the timespec values: */
+#define MSEC_PER_SEC 1000L
+#define USEC_PER_MSEC 1000L
+#define NSEC_PER_USEC 1000L
+#define NSEC_PER_MSEC 1000000L
+#define USEC_PER_SEC 1000000L
+#define NSEC_PER_SEC 1000000000L
+#define FSEC_PER_SEC 1000000000000000LL
+
+
+/*
+ * ################
+ * kernel time.h
+ * ################
+ * */
+
+
+#define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1)
+
+#ifdef VCOM_USE_TIMESPEC_EQUAL
+static inline int
+timespec_equal (const struct timespec *a, const struct timespec *b)
+{
+ return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec);
+}
+#endif
+
+/*
+ * lhs < rhs: return <0
+ * lhs == rhs: return 0
+ * lhs > rhs: return >0
+ */
+static inline int
+timespec_compare (const struct timespec *lhs, const struct timespec *rhs)
+{
+ if (lhs->tv_sec < rhs->tv_sec)
+ return -1;
+ if (lhs->tv_sec > rhs->tv_sec)
+ return 1;
+ return lhs->tv_nsec - rhs->tv_nsec;
+}
+
+#ifdef VCOM_USE_TIMEVAL_COMPARE
+static inline int
+timeval_compare (const struct timeval *lhs, const struct timeval *rhs)
+{
+ if (lhs->tv_sec < rhs->tv_sec)
+ return -1;
+ if (lhs->tv_sec > rhs->tv_sec)
+ return 1;
+ return lhs->tv_usec - rhs->tv_usec;
+}
+#endif
+
+extern void set_normalized_timespec (struct timespec *ts, time_t sec,
+ s64 nsec);
+
+static inline struct timespec
+timespec_add (struct timespec lhs, struct timespec rhs)
+{
+ struct timespec ts_delta;
+ set_normalized_timespec (&ts_delta, lhs.tv_sec + rhs.tv_sec,
+ lhs.tv_nsec + rhs.tv_nsec);
+ return ts_delta;
+}
+
+/*
+ * sub = lhs - rhs, in normalized form
+ */
+static inline struct timespec
+timespec_sub (struct timespec lhs, struct timespec rhs)
+{
+ struct timespec ts_delta;
+ set_normalized_timespec (&ts_delta, lhs.tv_sec - rhs.tv_sec,
+ lhs.tv_nsec - rhs.tv_nsec);
+ return ts_delta;
+}
+
+/*
+ * ################
+ * kernel time.c
+ * ################
+ * */
+
+
+/**
+ * set_normalized_timespec - set timespec sec and nsec parts and normalize
+ *
+ * @ts: pointer to timespec variable to be set
+ * @sec: seconds to set
+ * @nsec: nanoseconds to set
+ *
+ * Set seconds and nanoseconds field of a timespec variable and
+ * normalize to the timespec storage format
+ *
+ * Note: The tv_nsec part is always in the range of
+ * 0 <= tv_nsec < NSEC_PER_SEC
+ * For negative values only the tv_sec field is negative !
+ */
+void
+set_normalized_timespec (struct timespec *ts, time_t sec, s64 nsec)
+{
+ while (nsec >= NSEC_PER_SEC)
+ {
+ /*
+ * The following asm() prevents the compiler from
+ * optimising this loop into a modulo operation. See
+ * also __iter_div_u64_rem() in include/linux/time.h
+ */
+ asm ("":"+rm" (nsec));
+ nsec -= NSEC_PER_SEC;
+ ++sec;
+ }
+ while (nsec < 0)
+ {
+ asm ("":"+rm" (nsec));
+ nsec += NSEC_PER_SEC;
+ --sec;
+ }
+ ts->tv_sec = sec;
+ ts->tv_nsec = nsec;
+}
+
+#define vcom_timerisvalid(tvp) (!((tvp)->tv_sec < 0 || (tvp)->tv_usec < 0))
+
+/* Macros for converting between `struct timeval' and `struct timespec'. */
+#define VCOM_TIMEVAL_TO_TIMESPEC(tv, ts) { \
+ (ts)->tv_sec = (tv)->tv_sec; \
+ (ts)->tv_nsec = (tv)->tv_usec * 1000; \
+}
+#define VCOM_TIMESPEC_TO_TIMEVAL(tv, ts) { \
+ (tv)->tv_sec = (ts)->tv_sec; \
+ (tv)->tv_usec = (ts)->tv_nsec / 1000; \
+}
+
+static inline int
+vcom_select_impl (int vcom_nfds, fd_set * __restrict vcom_readfds,
+ fd_set * __restrict vcom_writefds,
+ fd_set * __restrict vcom_exceptfds,
+ struct timeval *__restrict timeout)
+{
+ return vcom_socket_select (vcom_nfds, vcom_readfds,
+ vcom_writefds, vcom_exceptfds, timeout);
+}
+
+int
+vcom_select (int __nfds, fd_set * __restrict __readfds,
+ fd_set * __restrict __writefds,
+ fd_set * __restrict __exceptfds,
+ struct timeval *__restrict __timeout)
+{
+ int rv;
+ int rv2 = 0;
+ pid_t pid = getpid ();
+
+ int timedout = 0;
+ /* block indefinitely */
+ int no_timeout = 0;
+ int first_clock_gettime_failed = 0;
+ /* timeout value in units of timespec */
+ struct timespec timeout_ts;
+ struct timespec start_time, now, end_time;
+
+ /* select sets attributes - after merge */
+ int new_nfds = 0;
+ int new_nfd = -1;
+
+ /* vcom */
+ int vcom_nfds = 0;
+ fd_set vcom_readfds;
+ fd_set vcom_writefds;
+ fd_set vcom_exceptfds;
+ int vcom_nfd = -1;
+
+ /* libc */
+ int libc_nfds = 0;
+ fd_set libc_readfds;
+ fd_set libc_writefds;
+ fd_set libc_exceptfds;
+ int libc_nfd = -1;
+
+ /* for polling */
+ struct timeval tv = {.tv_sec = 0,.tv_usec = 0 };
+
+ /* validate __timeout */
+ if (__timeout)
+ {
+ /* validate tv_sec */
+ /* bogus */
+ if (!vcom_timerisvalid (__timeout))
+ {
+ rv = -EINVAL;
+ goto select_done;
+ }
+
+ /* validate tv_usec */
+ /* TBD: */
+ /* init timeout_ts */
+ VCOM_TIMEVAL_TO_TIMESPEC (__timeout, &timeout_ts);
+ set_normalized_timespec (&timeout_ts,
+ timeout_ts.tv_sec, timeout_ts.tv_nsec);
+ }
+
+ rv = clock_gettime (CLOCK_MONOTONIC, &start_time);
+ if (rv == -1)
+ {
+ rv = -errno;
+ first_clock_gettime_failed = 1;
+ goto select_done;
+ }
+
+ /* init end_time */
+ if (__timeout)
+ {
+ if (timerisset (__timeout))
+ {
+ end_time = timespec_add (start_time, timeout_ts);
+ }
+ else
+ {
+ /*
+ * if both fields of the timeout structure are zero,
+ * then select returns immediately
+ * */
+ end_time = start_time;
+ }
+ }
+ else
+ {
+ /* block indefinitely */
+ no_timeout = 1;
+ }
+
+
+
+ if (vcom_init () != 0)
+ {
+ rv = -1;
+ goto select_done;
+ }
+
+ /* validate __nfds */
+ if (__nfds < 0 || __nfds > FD_SETSIZE)
+ {
+ rv = -EINVAL;
+ goto select_done;
+ }
+
+
+ /*
+ * usleep(3) emulation
+ * */
+
+ /* call libc_select() with a finite timeout and
+ * no file descriptors or empty fd sets and
+ * zero nfds */
+ if (__nfds == 0 &&
+ (!__readfds || fd_set_iszero (__readfds)) &&
+ (!__writefds || fd_set_iszero (__writefds)) &&
+ (!__exceptfds || fd_set_iszero (__exceptfds)))
+ {
+ if (__timeout)
+ {
+ rv = libc_select (__nfds,
+ __readfds, __writefds, __exceptfds, __timeout);
+ if (rv == -1)
+ rv = -errno;
+ }
+ else
+ {
+ /* TBD: block indefinitely or return -EINVAL */
+ rv = -EINVAL;
+ }
+ goto select_done;
+ }
+
+ /* init once before the polling loop */
+
+ /* zero vcom and libc fd sets */
+ /*
+ * S select fd set
+ * V vcom fd set
+ * L libc fd set
+ */
+#define _(S,V,L) \
+ if ((S)) \
+ { \
+ FD_ZERO ((V)); \
+ FD_ZERO ((L)); \
+ }
+
+
+ _(__readfds, &vcom_readfds, &libc_readfds);
+ _(__writefds, &vcom_writefds, &libc_writefds);
+ _(__exceptfds, &vcom_exceptfds, &libc_exceptfds);
+#undef _
+ new_nfds = 0;
+ new_nfd = -1;
+
+ vcom_nfds = 0;
+ vcom_nfd = -1;
+ libc_nfds = 0;
+ libc_nfd = -1;
+
+ vcom_fd_set_split (
+ /* src, select sets */
+ __nfds, __readfds, __writefds, __exceptfds,
+ /* dest1, vcom sets */
+ __readfds || __writefds || __exceptfds ?
+ &vcom_nfds : NULL,
+ __readfds ? &vcom_readfds : NULL,
+ __writefds ? &vcom_writefds : NULL,
+ __exceptfds ? &vcom_exceptfds : NULL,
+ __readfds || __writefds || __exceptfds ?
+ &vcom_nfd : NULL,
+ /* dest2, libc sets */
+ __readfds || __writefds || __exceptfds ?
+ &libc_nfds : NULL,
+ __readfds ? &libc_readfds : NULL,
+ __writefds ? &libc_writefds : NULL,
+ __exceptfds ? &libc_exceptfds : NULL,
+ __readfds || __writefds || __exceptfds ?
+ &libc_nfd : NULL);
+
+ /*
+ * polling loop
+ * */
+ do
+ {
+ new_nfd = -1;
+ vcom_nfd = -1;
+ libc_nfd = -1;
+
+ /*
+ * if both fields of timeval structure are zero,
+ * vcom_select_impl and libc_select returns immediately.
+ * useful for polling and ensure fairness among
+ * file descriptors watched.
+ */
+
+ /* for polling */
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+
+ /* select on vcom fds */
+ if (vcom_nfds)
+ {
+ vcom_nfd = vcom_select_impl (vcom_nfds,
+ __readfds ? &vcom_readfds : NULL,
+ __writefds ? &vcom_writefds : NULL,
+ __exceptfds ? &vcom_exceptfds : NULL,
+ &tv);
+ if (VCOM_DEBUG > 2)
+ fprintf (stderr,
+ "[%d] select vcom: "
+ "'%04d'='%04d'\n", pid, vcom_nfd, vcom_nfds);
+
+ if (vcom_nfd < 0)
+ {
+ rv = vcom_nfd;
+ goto select_done;
+ }
+ }
+ /* select on libc fds */
+ if (libc_nfds)
+ {
+ libc_nfd = libc_select (libc_nfds,
+ __readfds ? &libc_readfds : NULL,
+ __writefds ? &libc_writefds : NULL,
+ __exceptfds ? &libc_exceptfds : NULL, &tv);
+ if (VCOM_DEBUG > 2)
+ fprintf (stderr,
+ "[%d] select libc: "
+ "'%04d'='%04d'\n", pid, libc_nfd, libc_nfds);
+
+ if (libc_nfd < 0)
+ {
+ /* tv becomes undefined */
+ libc_nfd = errno;
+ rv = libc_nfd;
+ goto select_done;
+ }
+ }
+
+ /* check if any file descriptors changed status */
+ if ((vcom_nfds && vcom_nfd > 0) || (libc_nfds && libc_nfd > 0))
+ {
+ /* zero the sets before merge and exit */
+
+ /*
+ * F fd set
+ */
+#define _(F) \
+ if ((F)) \
+ { \
+ FD_ZERO ((F)); \
+ }
+
+
+ _(__readfds);
+ _(__writefds);
+ _(__exceptfds);
+#undef _
+ new_nfds = 0;
+ new_nfd = -1;
+
+ /*
+ * on exit, sets are modified in place to indicate which
+ * file descriptors actually changed status
+ * */
+ vcom_fd_set_merge (
+ /* dest, select sets */
+ __readfds || __writefds || __exceptfds ?
+ &new_nfds : NULL,
+ __readfds,
+ __writefds,
+ __exceptfds,
+ __readfds || __writefds || __exceptfds ?
+ &new_nfd : NULL,
+ /* src1, vcom sets */
+ vcom_nfds,
+ __readfds ? &vcom_readfds : NULL,
+ __writefds ? &vcom_writefds : NULL,
+ __exceptfds ? &vcom_exceptfds : NULL, vcom_nfd,
+ /* src2, libc sets */
+ libc_nfds,
+ __readfds ? &libc_readfds : NULL,
+ __writefds ? &libc_writefds : NULL,
+ __exceptfds ? &libc_exceptfds : NULL, libc_nfd);
+ /*
+ * return the number of file descriptors contained in the
+ * three returned sets
+ * */
+ rv = 0;
+ /*
+ * for documentation
+ *
+ * if(vcom_nfd > 0)
+ * rv += vcom_nfd;
+ * if(libc_nfd > 0)
+ * rv += libc_nfd;
+ */
+
+ rv = new_nfd == -1 ? 0 : new_nfd;
+ goto select_done;
+ }
+
+ rv = clock_gettime (CLOCK_MONOTONIC, &now);
+ if (rv == -1)
+ {
+ rv = -errno;
+ goto select_done;
+ }
+ }
+ while (no_timeout || timespec_compare (&now, &end_time) < 0);
+
+ /* timeout expired before anything interesting happened */
+ timedout = 1;
+ rv = 0;
+
+select_done:
+ if (VCOM_DEBUG > 2)
+ fprintf (stderr, "[%d] vselect1: " "'%04d'='%04d'\n", pid, rv, __nfds);
+ /*
+ * modify timeout parameter to reflect the amount of time not slept
+ * */
+ if (__timeout)
+ {
+ if (vcom_timerisvalid (__timeout))
+ {
+ /* timeout expired */
+ if (timedout)
+ {
+ timerclear (__timeout);
+ }
+ else if (!first_clock_gettime_failed)
+ {
+ rv2 = clock_gettime (CLOCK_MONOTONIC, &now);
+ if (rv2 == -1)
+ {
+ rv = -errno;
+ }
+ else
+ {
+ struct timespec ts_delta;
+ ts_delta = timespec_sub (end_time, now);
+ VCOM_TIMESPEC_TO_TIMEVAL (__timeout, &ts_delta);
+ }
+ }
+ }
+ }
+ if (VCOM_DEBUG > 2)
+ fprintf (stderr, "[%d] vselect2: " "'%04d',='%04d'\n", pid, rv, __nfds);
+
+ return rv;
+}
+
+int
+vcom_select_internal (int __nfds, fd_set * __restrict __readfds,
+ fd_set * __restrict __writefds,
+ fd_set * __restrict __exceptfds,
+ struct timeval *__restrict __timeout)
+{
+ int rv;
+ int new_nfds = 0;
+ int nfd = 0;
+ pid_t pid = getpid ();
+
+ fd_set saved_readfds;
+ fd_set saved_writefds;
+ fd_set saved_exceptfds;
+
+ /* validate __nfds */
+ if (__nfds < 0)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ /* validate __timeout */
+ if (__timeout)
+ {
+ /* validate tv_sec */
+ /* bogus */
+ if (__timeout->tv_sec < 0 || __timeout->tv_usec < 0)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ /* validate tv_usec */
+ /* TBD: */
+ }
+
+ /* init saved_x fds */
+ if (__readfds)
+ {
+ saved_readfds = *__readfds;
+ /*
+ memcpy (&saved_readfds, __readfds, sizeof (*__readfds));
+ */
+ }
+ else
+ {
+ FD_ZERO (&saved_readfds);
+ }
+
+ if (__writefds)
+ {
+ saved_writefds = *__writefds;
+ /*
+ memcpy (&saved_writefds, __writefds, sizeof (*__writefds));
+ */
+
+ }
+ else
+ {
+ FD_ZERO (&saved_writefds);
+ }
+
+ if (__exceptfds)
+ {
+ saved_exceptfds = *__exceptfds;
+ /*
+ memcpy (&saved_exceptfds, __exceptfds, sizeof (*__exceptfds));
+ */
+
+ }
+ else
+ {
+ FD_ZERO (&saved_exceptfds);
+ }
+
+ /* clear vcom fds */
+ nfd = vcom_fd_clear (__nfds, &new_nfds, __readfds, __writefds, __exceptfds);
+
+ /* set to an invalid value */
+ rv = -2;
+ /* have kernel fds */
+ if (new_nfds)
+ rv = libc_select (new_nfds, __readfds,
+ __writefds, __exceptfds, __timeout);
+
+ if (new_nfds && rv == -1)
+ {
+ /* on error, the file descriptor sets are unmodified */
+ if (__readfds)
+ *__readfds = saved_readfds;
+ if (__writefds)
+ *__writefds = saved_writefds;
+ if (__exceptfds)
+ *__exceptfds = saved_exceptfds;
+ return rv;
+ }
+ else if ((new_nfds && rv != -1) || (rv == -2))
+ {
+ /* restore vcom fds */
+ nfd = vcom_fd_set (__nfds,
+ &new_nfds,
+ __readfds,
+ __writefds,
+ __exceptfds,
+ &saved_readfds, &saved_writefds, &saved_exceptfds);
+ rv = nfd;
+ }
+
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr, "[%d] select: " "'%04d'='%04d'\n", pid, rv, __nfds);
+ return rv;
+}
+
+int
+select (int __nfds, fd_set * __restrict __readfds,
+ fd_set * __restrict __writefds,
+ fd_set * __restrict __exceptfds, struct timeval *__restrict __timeout)
+{
+ int rv = 0;
+ pid_t pid = getpid ();
+
+ if (VCOM_DEBUG > 2)
+ fprintf (stderr, "[%d] select1: " "'%04d'='%04d'\n", pid, rv, __nfds);
+ rv = vcom_select (__nfds, __readfds, __writefds, __exceptfds, __timeout);
+ if (VCOM_DEBUG > 2)
+ fprintf (stderr, "[%d] select2: " "'%04d'='%04d'\n", pid, rv, __nfds);
+ if (rv < 0)
+ {
+ errno = -rv;
+ return -1;
+ }
+ return rv;
+}
+
+#ifdef __USE_XOPEN2K
+/*
+ * Same as above only that the TIMEOUT value is given with higher
+ * resolution and a sigmask which is been set temporarily. This
+ * version should be used.
+ *
+ * This function is a cancellation point and therefore not marked
+ * with __THROW.
+ * */
+int
+vcom_pselect (int __nfds, fd_set * __restrict __readfds,
+ fd_set * __restrict __writefds,
+ fd_set * __restrict __exceptfds,
+ const struct timespec *__restrict __timeout,
+ const __sigset_t * __restrict __sigmask)
+{
+ int fd;
+ int vcom_nfds = 0;
+
+ for (fd = 0; fd < __nfds; fd++)
+ {
+ if (__readfds && FD_ISSET (fd, __readfds))
+ {
+ if (is_vcom_socket_fd (fd))
+ {
+ vcom_nfds++;
+ }
+ }
+
+ if (__writefds && FD_ISSET (fd, __writefds))
+ {
+ if (is_vcom_socket_fd (fd))
+ {
+ vcom_nfds++;
+ }
+ }
+ if (__exceptfds && FD_ISSET (fd, __exceptfds))
+ {
+ if (is_vcom_socket_fd (fd))
+ {
+ FD_CLR (fd, __exceptfds);
+ }
+ }
+ }
+ return vcom_nfds;
+}
+
+int
+pselect (int __nfds, fd_set * __restrict __readfds,
+ fd_set * __restrict __writefds,
+ fd_set * __restrict __exceptfds,
+ const struct timespec *__restrict __timeout,
+ const __sigset_t * __restrict __sigmask)
+{
+ int rv;
+ int new_nfds = 0;
+ int nfd = 0;
+ pid_t pid = getpid ();
+
+ fd_set saved_readfds;
+ fd_set saved_writefds;
+ fd_set saved_exceptfds;
+
+ /* validate __nfds */
+ if (__nfds < 0)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ /* validate __timeout */
+ if (__timeout)
+ {
+ /* validate tv_sec */
+ /* bogus */
+ if (__timeout->tv_sec < 0 || __timeout->tv_nsec < 0)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ /* validate tv_usec */
+ /* TBD: */
+ }
+
+ /* init saved fds */
+ if (__readfds)
+ {
+ saved_readfds = *__readfds;
+ /*
+ memcpy (&saved_readfds, __readfds, sizeof (*__readfds));
+ */
+ }
+ else
+ {
+ FD_ZERO (&saved_readfds);
+ }
+
+ if (__writefds)
+ {
+ saved_writefds = *__writefds;
+ /*
+ memcpy (&saved_writefds, __writefds, sizeof (*__writefds));
+ */
+
+ }
+ else
+ {
+ FD_ZERO (&saved_writefds);
+ }
+
+ if (__exceptfds)
+ {
+ saved_exceptfds = *__exceptfds;
+ /*
+ memcpy (&saved_exceptfds, __exceptfds, sizeof (*__exceptfds));
+ */
+
+ }
+ else
+ {
+ FD_ZERO (&saved_exceptfds);
+ }
+
+ /* clear vcom fds */
+ nfd = vcom_fd_clear (__nfds, &new_nfds, __readfds, __writefds, __exceptfds);
+
+ /* set to an invalid value */
+ rv = -2;
+ if (new_nfds)
+ rv = libc_pselect (new_nfds,
+ __readfds,
+ __writefds, __exceptfds, __timeout, __sigmask);
+
+ if (new_nfds && rv == -1)
+ {
+ /* on error, the file descriptor sets are unmodified */
+ if (__readfds)
+ *__readfds = saved_readfds;
+ if (__writefds)
+ *__writefds = saved_writefds;
+ if (__exceptfds)
+ *__exceptfds = saved_exceptfds;
+ return rv;
+ }
+ else if ((new_nfds && rv != -1) || (rv == -2))
+ {
+ /* restore vcom fds */
+ nfd = vcom_fd_set (__nfds,
+ &new_nfds,
+ __readfds,
+ __writefds,
+ __exceptfds,
+ &saved_readfds, &saved_writefds, &saved_exceptfds);
+ rv = nfd;
+ }
+
+ if (VCOM_DEBUG > 2)
+ fprintf (stderr, "[%d] pselect: " "'%04d'='%04d'\n", pid, rv, __nfds);
+ return rv;
+}
+#endif
+
+/*
+ *
+ * Socket specific glibc api
+ *
+ */
+
+/* Create a new socket of type TYPE in domain DOMAIN, using
+ * protocol PROTOCOL. If PROTOCOL is zero, one is chosen
+ * automatically. Returns a file descriptor for the new socket,
+ * or -1 for errors.
+ * RETURN: a valid file descriptor for the new socket,
+ * or -1 for errors.
+ * */
+
+int
+vcom_socket (int __domain, int __type, int __protocol)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_socket (__domain, __type, __protocol);
+}
+
+int
+socket (int __domain, int __type, int __protocol)
+{
+ int rv;
+ pid_t pid = getpid ();
+ pthread_t tid = pthread_self ();
+
+ /* handle domains implemented by vpp */
+ switch (__domain)
+ {
+ case AF_INET:
+ case AF_INET6:
+ /* handle types implemented by vpp */
+ switch (__type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+ {
+ case SOCK_STREAM:
+ case SOCK_DGRAM:
+ if (VCOM_DEBUG > 0)
+ vcom_socket_main_show ();
+ rv = vcom_socket (__domain, __type, __protocol);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d][%lu (0x%lx)] socket: "
+ "'%04d'= D='%04d', T='%04d', P='%04d'\n",
+ pid, (unsigned long) tid, (unsigned long) tid,
+ rv, __domain, __type, __protocol);
+ if (VCOM_DEBUG > 0)
+ vcom_socket_main_show ();
+ if (rv < 0)
+ {
+ errno = -rv;
+ return -1;
+ }
+ return rv;
+ break;
+
+ default:
+ goto CALL_GLIBC_SOCKET_API;
+ break;
+ }
+
+ break;
+
+ default:
+ goto CALL_GLIBC_SOCKET_API;
+ break;
+ }
+
+CALL_GLIBC_SOCKET_API:
+ return libc_socket (__domain, __type, __protocol);
+}
+
+/*
+ * Create two new sockets, of type TYPE in domain DOMAIN and using
+ * protocol PROTOCOL, which are connected to each other, and put file
+ * descriptors for them in FDS[0] and FDS[1]. If PROTOCOL is zero,
+ * one will be chosen automatically.
+ * Returns 0 on success, -1 for errors.
+ * */
+int
+vcom_socketpair (int __domain, int __type, int __protocol, int __fds[2])
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_socketpair (__domain, __type, __protocol, __fds);
+}
+
+int
+socketpair (int __domain, int __type, int __protocol, int __fds[2])
+{
+ int rv;
+ pid_t pid = getpid ();
+
+ /* handle domains implemented by vpp */
+ switch (__domain)
+ {
+ case AF_INET:
+ case AF_INET6:
+ /* handle types implemented by vpp */
+ switch (__type)
+ {
+ case SOCK_STREAM:
+ case SOCK_DGRAM:
+ rv = vcom_socketpair (__domain, __type, __protocol, __fds);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] socketpair: "
+ "'%04d'= D='%04d', T='%04d', P='%04d'\n",
+ pid, rv, __domain, __type, __protocol);
+ if (rv < 0)
+ {
+ errno = -rv;
+ return -1;
+ }
+ return 0;
+ break;
+
+ default:
+ goto CALL_GLIBC_SOCKET_API;
+ break;
+ }
+
+ break;
+
+ default:
+ goto CALL_GLIBC_SOCKET_API;
+ break;
+ }
+
+CALL_GLIBC_SOCKET_API:
+ return libc_socketpair (__domain, __type, __protocol, __fds);
+}
+
+/*
+ * Give the socket FD the local address ADDR
+ * (which is LEN bytes long).
+ * */
+int
+vcom_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
+{
+ int rv;
+
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ /* validate __len */
+ switch (__addr->sa_family)
+ {
+ case AF_INET:
+ if (__len != sizeof (struct sockaddr_in))
+ return -EINVAL;
+ break;
+ case AF_INET6:
+ if (__len != sizeof (struct sockaddr_in6))
+ return -EINVAL;
+ break;
+
+ default:
+ return -1;
+ break;
+ }
+
+ /* handle domains implemented by vpp */
+ switch (__addr->sa_family)
+ {
+ case AF_INET:
+ case AF_INET6:
+ rv = vcom_socket_bind (__fd, __addr, __len);
+ return rv;
+ break;
+
+ default:
+ return -1;
+ break;
+ }
+
+ return -1;
+}
+
+int
+bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
+{
+ int rv;
+ pid_t pid = getpid ();
+
+ if (is_vcom_socket_fd (__fd))
+ {
+
+ rv = vcom_bind (__fd, __addr, __len);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] bind: "
+ "'%04d'='%04d', '%p', '%04d'\n",
+ pid, rv, __fd, __addr, __len);
+ if (rv != 0)
+ {
+ errno = -rv;
+ return -1;
+ }
+ return 0;
+ }
+ return libc_bind (__fd, __addr, __len);
+}
+
+/*
+ * Put the local address of FD into *ADDR and its length in *LEN.
+ * */
+int
+vcom_getsockname (int __fd, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __len)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_getsockname (__fd, __addr, __len);
+}
+
+int
+getsockname (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len)
+{
+ int rv;
+ pid_t pid = getpid ();
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ rv = vcom_getsockname (__fd, __addr, __len);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] getsockname: "
+ "'%04d'='%04d', '%p', '%p'\n", pid, rv, __fd, __addr, __len);
+ if (rv != 0)
+ {
+ errno = -rv;
+ return -1;
+ }
+ return 0;
+ }
+ return libc_getsockname (__fd, __addr, __len);
+}
+
+/*
+ * Open a connection on socket FD to peer at ADDR
+ * (which LEN bytes long). For connectionless socket types, just set
+ * the default address to send to and the only address from which to
+ * accept transmissions. Return 0 on success, -1 for errors.
+ * This function is a cancellation point and therefore not marked
+ * with __THROW.
+ * */
+int
+vcom_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
+{
+ int rv = -1;
+
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ /* validate __len */
+ switch (__addr->sa_family)
+ {
+ case AF_INET:
+ if (__len != INET_ADDRSTRLEN)
+ return -1;
+ break;
+ case AF_INET6:
+ if (__len != INET6_ADDRSTRLEN)
+ return -1;
+ break;
+
+ default:
+ return -1;
+ break;
+ }
+
+ /* handle domains implemented by vpp */
+ switch (__addr->sa_family)
+ {
+ case AF_INET:
+ case AF_INET6:
+ rv = vcom_socket_connect (__fd, __addr, __len);
+ break;
+
+ default:
+ return -1;
+ break;
+ }
+
+ return rv;
+}
+
+int
+connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
+{
+ int rv;
+ pid_t pid = getpid ();
+ pthread_t tid = pthread_self ();
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ rv = vcom_connect (__fd, __addr, __len);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d][%lu (0x%lx)] connect: "
+ "'%04d'='%04d', '%p', '%04d'\n",
+ pid, (unsigned long) tid, (unsigned long) tid,
+ rv, __fd, __addr, __len);
+ if (rv != 0)
+ {
+ errno = -rv;
+ return -1;
+ }
+ return 0;
+ }
+
+ return libc_connect (__fd, __addr, __len);
+}
+
+/*
+ * Put the address of the peer connected to socket FD into *ADDR
+ * (which is *LEN bytes long), and its actual length into *LEN.
+ * */
+int
+vcom_getpeername (int __fd, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __len)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_getpeername (__fd, __addr, __len);
+}
+
+int
+getpeername (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len)
+{
+ int rv;
+ pid_t pid = getpid ();
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ rv = vcom_getpeername (__fd, __addr, __len);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] getpeername: "
+ "'%04d'='%04d', '%p', '%p'\n", pid, rv, __fd, __addr, __len);
+ if (rv != 0)
+ {
+ errno = -rv;
+ return -1;
+ }
+ return 0;
+ }
+ return libc_getpeername (__fd, __addr, __len);
+}
+
+/*
+ * Send N bytes of BUF to socket FD. Returns the number sent or -1.
+ * This function is a cancellation point and therefore not marked
+ * with __THROW.
+ * */
+ssize_t
+vcom_send (int __fd, const void *__buf, size_t __n, int __flags)
+{
+
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_send (__fd, (void *) __buf, (int) __n, __flags);
+}
+
+ssize_t
+send (int __fd, const void *__buf, size_t __n, int __flags)
+{
+ ssize_t size;
+ pid_t pid = getpid ();
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ size = vcom_send (__fd, __buf, __n, __flags);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] send: "
+ "'%04d'='%04d', '%p', '%04d', '%04x'\n",
+ pid, (int) size, __fd, __buf, (int) __n, __flags);
+ if (size < 0)
+ {
+ errno = -size;
+ return -1;
+ }
+ return size;
+ }
+ return libc_send (__fd, __buf, __n, __flags);
+}
+
+/*
+ * Read N bytes into BUF from socket FD.
+ * Returns the number read or -1 for errors.
+ * This function is a cancellation point and therefore not marked
+ * with __THROW.
+ * */
+ssize_t
+vcom_recv (int __fd, void *__buf, size_t __n, int __flags)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_recv (__fd, __buf, __n, __flags);
+}
+
+ssize_t
+recv (int __fd, void *__buf, size_t __n, int __flags)
+{
+ ssize_t size;
+ pid_t pid = getpid ();
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ size = vcom_recv (__fd, __buf, __n, __flags);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] recv: "
+ "'%04d'='%04d', '%p', '%04d', '%04x'\n",
+ pid, (int) size, __fd, __buf, (int) __n, __flags);
+ if (size < 0)
+ {
+ errno = -size;
+ return -1;
+ }
+ return size;
+ }
+ return libc_recv (__fd, __buf, __n, __flags);
+}
+
+/*
+ * Send N bytes of BUF on socket FD to peer at address ADDR (which is
+ * ADDR_LEN bytes long). Returns the number sent, or -1 for errors.
+ * This function is a cancellation point and therefore not marked
+ * with __THROW.
+ * */
+ssize_t
+vcom_sendto (int __fd, const void *__buf, size_t __n, int __flags,
+ __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_sendto (__fd, __buf, __n, __flags, __addr, __addr_len);
+}
+
+ssize_t
+sendto (int __fd, const void *__buf, size_t __n, int __flags,
+ __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len)
+{
+ ssize_t size;
+ pid_t pid = getpid ();
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ size = vcom_sendto (__fd, __buf, __n, __flags, __addr, __addr_len);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] sendto: "
+ "'%04d'='%04d', '%p', '%04d', '%04x', "
+ "'%p', '%04d'\n",
+ pid, (int) size, __fd, __buf, (int) __n, __flags,
+ __addr, __addr_len);
+ if (size < 0)
+ {
+ errno = -size;
+ return -1;
+ }
+ return size;
+ }
+ return libc_sendto (__fd, __buf, __n, __flags, __addr, __addr_len);
+}
+
+/*
+ * Read N bytes into BUF through socket FD.
+ * If ADDR is not NULL, fill in *ADDR_LEN bytes of it with the
+ * address of the sender, and store the actual size of the address
+ * in *ADDR_LEN.
+ * Returns the number of bytes read or -1 for errors.
+ * This function is a cancellation point and therefore not marked
+ * with __THROW.
+ * */
+ssize_t
+vcom_recvfrom (int __fd, void *__restrict __buf, size_t __n,
+ int __flags,
+ __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_recvfrom (__fd, __buf, __n, __flags, __addr, __addr_len);
+}
+
+ssize_t
+recvfrom (int __fd, void *__restrict __buf, size_t __n,
+ int __flags,
+ __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len)
+{
+ ssize_t size;
+ pid_t pid = getpid ();
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ size = vcom_recvfrom (__fd, __buf, __n, __flags, __addr, __addr_len);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] recvfrom: "
+ "'%04d'='%04d', '%p', '%04d', '%04x', "
+ "'%p', '%p'\n",
+ pid, (int) size, __fd, __buf, (int) __n, __flags,
+ __addr, __addr_len);
+ if (size < 0)
+ {
+ errno = -size;
+ return -1;
+ }
+ return size;
+ }
+ return libc_recvfrom (__fd, __buf, __n, __flags, __addr, __addr_len);
+}
+
+/*
+ * Send a message described MESSAGE on socket FD.
+ * Returns the number of bytes sent, or -1 for errors.
+ * This function is a cancellation point and therefore not marked
+ * with __THROW.
+ * */
+ssize_t
+vcom_sendmsg (int __fd, const struct msghdr * __message, int __flags)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_sendmsg (__fd, __message, __flags);
+}
+
+ssize_t
+sendmsg (int __fd, const struct msghdr * __message, int __flags)
+{
+ ssize_t size;
+ pid_t pid = getpid ();
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ size = vcom_sendmsg (__fd, __message, __flags);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] sendmsg: "
+ "'%04d'='%04d', '%p', '%04x'\n",
+ pid, (int) size, __fd, __message, __flags);
+ if (size < 0)
+ {
+ errno = -size;
+ return -1;
+ }
+ return size;
+ }
+ return libc_sendmsg (__fd, __message, __flags);
+}
+
+#ifdef __USE_GNU
+/*
+ * Send a VLEN messages as described by VMESSAGES to socket FD.
+ * Returns the number of datagrams successfully written
+ * or -1 for errors.
+ * This function is a cancellation point and therefore not marked
+ * with __THROW.
+ * */
+int
+vcom_sendmmsg (int __fd, struct mmsghdr *__vmessages,
+ unsigned int __vlen, int __flags)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_sendmmsg (__fd, __message, __vlen, __flags);
+}
+
+int
+sendmmsg (int __fd, struct mmsghdr *__vmessages,
+ unsigned int __vlen, int __flags)
+{
+ ssize_t size;
+ pid_t pid = getpid ();
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ size = vcom_sendmmsg (__fd, __message, __vlen, __flags);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] sendmmsg: "
+ "'%04d'='%04d', '%p', '%04d', '%04x'\n",
+ pid, (int) size, __fd, __vmessages, __vlen, __flags);
+ if (size < 0)
+ {
+ errno = -size;
+ return -1;
+ }
+ return size;
+ }
+ return libc_sendmmsg (__fd, __message, __vlen, __flags);
+}
+
+#endif
+
+/*
+ * Receive a message as described by MESSAGE from socket FD.
+ * Returns the number of bytes read or -1 for errors.
+ * This function is a cancellation point and therefore not marked
+ * with __THROW.
+ * */
+ssize_t
+vcom_recvmsg (int __fd, struct msghdr * __message, int __flags)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_recvmsg (__fd, __message, __flags);
+}
+
+ssize_t
+recvmsg (int __fd, struct msghdr * __message, int __flags)
+{
+ ssize_t size;
+ pid_t pid = getpid ();
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ size = vcom_recvmsg (__fd, __message, __flags);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] recvmsg: "
+ "'%04d'='%04d', '%p', '%04x'\n",
+ pid, (int) size, __fd, __message, __flags);
+ if (size < 0)
+ {
+ errno = -size;
+ return -1;
+ }
+ return size;
+ }
+ return libc_recvmsg (__fd, __message, __flags);
+}
+
+#ifdef __USE_GNU
+/*
+ * Receive up to VLEN messages as described by VMESSAGES from socket FD.
+ * Returns the number of messages received or -1 for errors.
+ * This function is a cancellation point and therefore not marked
+ * with __THROW.
+ * */
+int
+vcom_recvmmsg (int __fd, struct mmsghdr *__vmessages,
+ unsigned int __vlen, int __flags, struct timespec *__tmo)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_recvmmsg (__fd, __message, __vlen, __flags, __tmo);
+}
+
+int
+recvmmsg (int __fd, struct mmsghdr *__vmessages,
+ unsigned int __vlen, int __flags, struct timespec *__tmo)
+{
+ ssize_t size;
+ pid_t pid = getpid ();
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ size = vcom_recvmmsg (__fd, __message, __vlen, __flags, __tmo);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] recvmmsg: "
+ "'%04d'='%04d', '%p', "
+ "'%04d', '%04x', '%p'\n",
+ pid, (int) size, __fd, __vmessages, __vlen, __flags, __tmo);
+ if (size < 0)
+ {
+ errno = -size;
+ return -1;
+ }
+ return size;
+ }
+ return libc_recvmmsg (__fd, __message, __vlen, __flags, __tmo);
+}
+
+#endif
+
+/*
+ * Put the current value for socket FD's option OPTNAME
+ * at protocol level LEVEL into OPTVAL (which is *OPTLEN bytes long),
+ * and set *OPTLEN to the value's actual length.
+ * Returns 0 on success, -1 for errors.
+ * */
+int
+vcom_getsockopt (int __fd, int __level, int __optname,
+ void *__restrict __optval, socklen_t * __restrict __optlen)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_getsockopt (__fd, __level, __optname,
+ __optval, __optlen);
+}
+
+int
+getsockopt (int __fd, int __level, int __optname,
+ void *__restrict __optval, socklen_t * __restrict __optlen)
+{
+ int rv;
+ pid_t pid = getpid ();
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ rv = vcom_getsockopt (__fd, __level, __optname, __optval, __optlen);
+ if (VCOM_DEBUG > 2)
+ fprintf (stderr,
+ "[%d] getsockopt: "
+ "'%04d'='%04d', '%04d', '%04d', "
+ "'%p', '%p'\n",
+ pid, rv, __fd, __level, __optname, __optval, __optlen);
+ if (rv != 0)
+ {
+ errno = -rv;
+ return -1;
+ }
+ return 0;
+ }
+ return libc_getsockopt (__fd, __level, __optname, __optval, __optlen);
+}
+
+/*
+ * Set socket FD's option OPTNAME at protocol level LEVEL
+ * to *OPTVAL (which is OPTLEN bytes long).
+ * Returns 0 on success, -1 for errors.
+ * */
+int
+vcom_setsockopt (int __fd, int __level, int __optname,
+ const void *__optval, socklen_t __optlen)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_setsockopt (__fd, __level, __optname,
+ __optval, __optlen);
+}
+
+int
+setsockopt (int __fd, int __level, int __optname,
+ const void *__optval, socklen_t __optlen)
+{
+ int rv;
+ pid_t pid = getpid ();
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ rv = vcom_setsockopt (__fd, __level, __optname, __optval, __optlen);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] setsockopt: "
+ "'%04d'='%04d', '%04d', '%04d', "
+ "'%p', '%04d'\n",
+ pid, rv, __fd, __level, __optname, __optval, __optlen);
+ if (rv != 0)
+ {
+ errno = -rv;
+ return -1;
+ }
+ return 0;
+ }
+ return libc_setsockopt (__fd, __level, __optname, __optval, __optlen);
+}
+
+/*
+ * Prepare to accept connections on socket FD.
+ * N connection requests will be queued before further
+ * requests are refused.
+ * Returns 0 on success, -1 for errors.
+ * */
+int
+vcom_listen (int __fd, int __n)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_listen (__fd, __n);
+}
+
+int
+listen (int __fd, int __n)
+{
+ int rv;
+ pid_t pid = getpid ();
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ rv = vcom_listen (__fd, __n);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] listen: "
+ "'%04d'='%04d', '%04d'\n", pid, rv, __fd, __n);
+ if (rv != 0)
+ {
+ errno = -rv;
+ return -1;
+ }
+ return 0;
+ }
+ return libc_listen (__fd, __n);
+}
+
+/*
+ * Await a connection on socket FD.
+ * When a connection arrives, open a new socket to communicate
+ * with it, set *ADDR (which is *ADDR_LEN bytes long) to the address
+ * of the connecting peer and *ADDR_LEN to the address's actual
+ * length, and return the new socket's descriptor, or -1 for errors.
+ * This function is a cancellation point and therefore not marked
+ * with __THROW.
+ * */
+int
+vcom_accept (int __fd, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __addr_len)
+{
+
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+ return vcom_socket_accept (__fd, __addr, __addr_len);
+}
+
+int
+accept (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len)
+{
+ int rv = -1;
+ pid_t pid = getpid ();
+ pthread_t tid = pthread_self ();
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ if (VCOM_DEBUG > 0)
+ vcom_socket_main_show ();
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d][%lu (0x%lx)] accept1: "
+ "'%04d'='%04d', '%p', '%p'\n",
+ pid, (unsigned long) tid, (unsigned long) tid,
+ rv, __fd, __addr, __addr_len);
+ rv = vcom_accept (__fd, __addr, __addr_len);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d][%lu (0x%lx)] accept2: "
+ "'%04d'='%04d', '%p', '%p'\n",
+ pid, (unsigned long) tid, (unsigned long) tid,
+ rv, __fd, __addr, __addr_len);
+ if (VCOM_DEBUG > 0)
+ vcom_socket_main_show ();
+ if (rv < 0)
+ {
+ errno = -rv;
+ return -1;
+ }
+ return rv;
+ }
+ return libc_accept (__fd, __addr, __addr_len);
+}
+
+#ifdef __USE_GNU
+/*
+ * Similar to 'accept' but takes an additional parameter to specify
+ * flags.
+ * This function is a cancellation point and therefore not marked
+ * with __THROW.
+ * */
+int
+vcom_accept4 (int __fd, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __addr_len, int __flags)
+{
+
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_socket_accept4 (__fd, __addr, __addr_len, __flags);
+}
+
+int
+accept4 (int __fd, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __addr_len, int __flags)
+{
+ int rv;
+ pid_t pid = getpid ();
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ if (VCOM_DEBUG > 0)
+ vcom_socket_main_show ();
+ rv = vcom_accept4 (__fd, __addr, __addr_len, __flags);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] accept4: "
+ "'%04d'='%04d', '%p', '%p', '%04x'\n",
+ pid, rv, __fd, __addr, __addr_len, __flags);
+ if (VCOM_DEBUG > 0)
+ vcom_socket_main_show ();
+ if (rv < 0)
+ {
+ errno = -rv;
+ return -1;
+ }
+ return rv;
+ }
+ return libc_accept4 (__fd, __addr, __addr_len, __flags);
+}
+
+#endif
+
+/*
+ * Shut down all or part of the connection open on socket FD.
+ * HOW determines what to shut down:
+ * SHUT_RD = No more receptions;
+ * SHUT_WR = No more transmissions;
+ * SHUT_RDWR = No more receptions or transmissions.
+ * Returns 0 on success, -1 for errors.
+ * */
+int
+vcom_shutdown (int __fd, int __how)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+ return vcom_socket_shutdown (__fd, __how);
+}
+
+int
+shutdown (int __fd, int __how)
+{
+ int rv;
+ pid_t pid = getpid ();
+
+ if (is_vcom_socket_fd (__fd))
+ {
+ rv = vcom_shutdown (__fd, __how);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] shutdown: "
+ "'%04d'='%04d', '%04d'\n", pid, rv, __fd, __how);
+ if (rv != 0)
+ {
+ errno = -rv;
+ return -1;
+ }
+ return 0;
+ }
+ return libc_shutdown (__fd, __how);
+}
+
+int
+vcom_epoll_create (int __size)
+{
+
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ if (__size <= 0)
+ {
+ return -EINVAL;
+ }
+
+ /* __size argument is ignored "thereafter" */
+ return vcom_epoll_create1 (0);
+}
+
+/*
+ * __size argument is ignored, but must be greater than zero
+ */
+int
+epoll_create (int __size)
+{
+ int rv = 0;
+ pid_t pid = getpid ();
+
+ rv = vcom_epoll_create (__size);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] epoll_create: " "'%04d'='%04d'\n", pid, rv, __size);
+ if (rv < 0)
+ {
+ errno = -rv;
+ return -1;
+ }
+ return rv;
+}
+
+int
+vcom_epoll_create1 (int __flags)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ if (__flags < 0)
+ {
+ return -EINVAL;
+ }
+ if (__flags & ~EPOLL_CLOEXEC)
+ {
+ return -EINVAL;
+ }
+ /* __flags can be either zero or EPOLL_CLOEXEC */
+ /* implementation */
+ return vcom_socket_epoll_create1 (__flags);
+}
+
+/*
+ * __flags can be either zero or EPOLL_CLOEXEC
+ * */
+int
+epoll_create1 (int __flags)
+{
+ int rv = 0;
+ pid_t pid = getpid ();
+
+ rv = vcom_epoll_create1 (__flags);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] epoll_create: " "'%04d'='%08x'\n", pid, rv, __flags);
+ if (rv < 0)
+ {
+ errno = -rv;
+ return -1;
+ }
+ return rv;
+}
+
+static inline int
+ep_op_has_event (int op)
+{
+ return op != EPOLL_CTL_DEL;
+}
+
+int
+vcom_epoll_ctl (int __epfd, int __op, int __fd, struct epoll_event *__event)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ /*
+ * the requested operation __op is not supported
+ * by this interface */
+ if (!((__op == EPOLL_CTL_ADD) ||
+ (__op == EPOLL_CTL_MOD) || (__op == EPOLL_CTL_DEL)))
+ {
+ return -EINVAL;
+ }
+
+ /* op is ADD or MOD but event parameter is NULL */
+ if ((ep_op_has_event (__op) && !__event))
+ {
+ return -EFAULT;
+ }
+
+ /* fd is same as epfd */
+ /* do not permit adding an epoll file descriptor inside itself */
+ if (__epfd == __fd)
+ {
+ return -EINVAL;
+ }
+
+ /* implementation */
+ return vcom_socket_epoll_ctl (__epfd, __op, __fd, __event);
+}
+
+/*
+ * implement the controller interface for epoll
+ * that enables the insertion/removal/change of
+ * file descriptors inside the interest set.
+ */
+int
+epoll_ctl (int __epfd, int __op, int __fd, struct epoll_event *__event)
+{
+ int rv;
+ pid_t pid = getpid ();
+
+ if (is_vcom_epfd (__epfd))
+ {
+ /* TBD: currently limiting epoll to support only vcom fds */
+ if (is_vcom_socket_fd (__fd))
+ {
+ rv = vcom_epoll_ctl (__epfd, __op, __fd, __event);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] epoll_ctl: "
+ "'%04d'='%04d', '%04d', '%04d'\n",
+ pid, rv, __epfd, __op, __fd);
+ if (rv != 0)
+ {
+ errno = -rv;
+ return -1;
+ }
+ return 0;
+ }
+ else
+ {
+ /*
+ * TBD: currently epoll does not support kernel fds
+ * or epoll fds */
+ errno = EBADF;
+ return -1;
+ }
+ }
+ else
+ {
+ /* epfd is not an epoll file descriptor */
+ errno = EINVAL;
+ return -1;
+ }
+ return 0;
+}
+
+int
+vcom_epoll_wait (int __epfd, struct epoll_event *__events,
+ int __maxevents, int __timeout)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return vcom_epoll_pwait (__epfd, __events, __maxevents, __timeout, NULL);
+}
+
+int
+epoll_wait (int __epfd, struct epoll_event *__events,
+ int __maxevents, int __timeout)
+{
+ int rv;
+ pid_t pid = getpid ();
+
+ if (__maxevents <= 0 || __maxevents > EP_MAX_EVENTS)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (is_vcom_epfd (__epfd))
+ {
+ rv = vcom_epoll_wait (__epfd, __events, __maxevents, __timeout);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] epoll_wait: "
+ "'%04d'='%04d', '%p', "
+ "'%04d', '%04d'\n",
+ pid, rv, __epfd, __events, __maxevents, __timeout);
+ if (rv < 0)
+ {
+ errno = -rv;
+ return -1;
+ }
+ return rv;
+ }
+ else
+ {
+ errno = EINVAL;
+ return -1;
+ }
+ return 0;
+}
+
+
+int
+vcom_epoll_pwait (int __epfd, struct epoll_event *__events,
+ int __maxevents, int __timeout, const __sigset_t * __ss)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ /* implementation */
+ return vcom_socket_epoll_pwait (__epfd, __events,
+ __maxevents, __timeout, __ss);
+}
+
+int
+epoll_pwait (int __epfd, struct epoll_event *__events,
+ int __maxevents, int __timeout, const __sigset_t * __ss)
+{
+ int rv;
+ pid_t pid = getpid ();
+
+ if (__maxevents <= 0 || __maxevents > EP_MAX_EVENTS)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (is_vcom_epfd (__epfd))
+ {
+ rv = vcom_epoll_pwait (__epfd, __events, __maxevents, __timeout, __ss);
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] epoll_pwait: "
+ "'%04d'='%04d', '%p', "
+ "'%04d', '%04d', "
+ "'%p'\n",
+ pid, rv, __epfd, __events, __maxevents, __timeout, __ss);
+ if (rv < 0)
+ {
+ errno = -rv;
+ return -1;
+ }
+ return rv;
+ }
+ else
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Poll the file descriptors described by the NFDS structures starting at
+ FDS. If TIMEOUT is nonzero and not -1, allow TIMEOUT milliseconds for
+ an event to occur; if TIMEOUT is -1, block until an event occurs.
+ Returns the number of file descriptors with events, zero if timed out,
+ or -1 for errors.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+
+int
+vcom_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout)
+{
+ int rv = 0;
+ pid_t pid = getpid ();
+
+ struct rlimit nofile_limit;
+ struct pollfd vcom_fds[MAX_POLL_NFDS_DEFAULT];
+ nfds_t fds_idx = 0;
+
+ /* actual set of file descriptors to be monitored */
+ nfds_t libc_nfds = 0;
+ nfds_t vcom_nfds = 0;
+
+ /* ready file descriptors
+ *
+ * number of structures which have nonzero revents fields
+ * in other words, descriptors with events or errors reported.
+ * */
+ /* after call to libc_poll () */
+ int rlibc_nfds = 0;
+ /* after call to vcom_socket_poll () */
+ int rvcom_nfds = 0;
+
+
+ /* timeout value in units of timespec */
+ struct timespec timeout_ts;
+ struct timespec start_time, now, end_time;
+
+
+ /* get start_time */
+ rv = clock_gettime (CLOCK_MONOTONIC, &start_time);
+ if (rv == -1)
+ {
+ rv = -errno;
+ goto poll_done;
+ }
+
+ /* set timeout_ts & end_time */
+ if (__timeout >= 0)
+ {
+ /* set timeout_ts */
+ timeout_ts.tv_sec = __timeout / MSEC_PER_SEC;
+ timeout_ts.tv_nsec = (__timeout % MSEC_PER_SEC) * NSEC_PER_MSEC;
+ set_normalized_timespec (&timeout_ts,
+ timeout_ts.tv_sec, timeout_ts.tv_nsec);
+ /* set end_time */
+ if (__timeout)
+ {
+ end_time = timespec_add (start_time, timeout_ts);
+ }
+ else
+ {
+ end_time = start_time;
+ }
+ }
+
+ if (vcom_init () != 0)
+ {
+ rv = -1;
+ goto poll_done;
+ }
+
+ /* validate __fds */
+ if (!__fds)
+ {
+ rv = -EFAULT;
+ goto poll_done;
+ }
+
+ /* validate __nfds */
+ /*TBD: call getrlimit once when vcl-ldpreload library is init */
+ rv = getrlimit (RLIMIT_NOFILE, &nofile_limit);
+ if (rv != 0)
+ {
+ rv = -errno;
+ goto poll_done;
+ }
+ if (__nfds >= nofile_limit.rlim_cur)
+ {
+ rv = -EINVAL;
+ goto poll_done;
+ }
+
+ /*
+ * for the POC, it's fair to assume that nfds is less than 1024
+ * */
+ if (__nfds >= MAX_POLL_NFDS_DEFAULT)
+ {
+ rv = -EINVAL;
+ goto poll_done;
+ }
+
+ /* set revents field (output parameter)
+ * to zero
+ * */
+ for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
+ {
+ __fds[fds_idx].revents = 0;
+ }
+
+#if 0
+ /* set revents field (output parameter)
+ * to zero for user ignored fds
+ * */
+ for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
+ {
+ /*
+ * if negative fd, ignore events field
+ * and set output parameter (revents field) to zero */
+ if (__fds[fds_idx].fd < 0)
+ {
+ __fds[fds_idx].revents = 0;
+ }
+ }
+#endif
+
+ /*
+ * 00. prepare __fds and vcom_fds for polling
+ * copy __fds to vcom_fds
+ * 01. negate all except libc fds in __fds,
+ * ignore user negated fds
+ * 02. negate all except vcom_fds in vocm fds,
+ * ignore user negated fds
+ * ignore fd 0 by setting it to negative number
+ * */
+ memcpy (vcom_fds, __fds, sizeof (*__fds) * __nfds);
+ libc_nfds = 0;
+ vcom_nfds = 0;
+ for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
+ {
+ /* ignore negative fds */
+ if (__fds[fds_idx].fd < 0)
+ {
+ continue;
+ }
+
+ /*
+ * 00. ignore vcom fds in __fds
+ * 01. ignore libc fds in vcom_fds,
+ * ignore fd 0 by setting it to negative number.
+ * as fd 0 cannot be ignored.
+ */
+ if (is_vcom_socket_fd (__fds[fds_idx].fd) ||
+ is_vcom_epfd (__fds[fds_idx].fd))
+ {
+ __fds[fds_idx].fd = -__fds[fds_idx].fd;
+ vcom_nfds++;
+ }
+ else
+ {
+ libc_nfds++;
+ /* ignore fd 0 by setting it to negative number */
+ if (!vcom_fds[fds_idx].fd)
+ {
+ vcom_fds[fds_idx].fd = -1;
+ }
+ vcom_fds[fds_idx].fd = -vcom_fds[fds_idx].fd;
+ }
+ }
+
+ /*
+ * polling loop
+ *
+ * poll on libc fds and vcom fds
+ *
+ * specifying a timeout of zero causes libc_poll() and
+ * vcom_socket_poll() to return immediately, even if no
+ * file descriptors are ready
+ * */
+ do
+ {
+ rlibc_nfds = 0;
+ rvcom_nfds = 0;
+
+ /*
+ * timeout parameter for libc_poll () set to zero
+ * to poll on libc fds
+ * */
+
+ /* poll on libc fds */
+ if (libc_nfds)
+ {
+ /*
+ * a timeout of zero causes libc_poll()
+ * to return immediately
+ * */
+ rlibc_nfds = libc_poll (__fds, __nfds, 0);
+ if (VCOM_DEBUG > 2)
+ fprintf (stderr,
+ "[%d] poll libc: "
+ "'%04d'='%08lu'\n", pid, rlibc_nfds, __nfds);
+
+ if (rlibc_nfds < 0)
+ {
+ rv = -errno;
+ goto poll_done_update_nfds;
+ }
+ }
+
+ /*
+ * timeout parameter for vcom_socket_poll () set to zero
+ * to poll on vcom fds
+ * */
+
+ /* poll on vcom fds */
+ if (vcom_nfds)
+ {
+ /*
+ * a timeout of zero causes vcom_socket_poll()
+ * to return immediately
+ * */
+ rvcom_nfds = vcom_socket_poll (vcom_fds, __nfds, 0);
+ if (VCOM_DEBUG > 2)
+ fprintf (stderr,
+ "[%d] poll vcom: "
+ "'%04d'='%08lu'\n", pid, rvcom_nfds, __nfds);
+ if (rvcom_nfds < 0)
+ {
+ rv = rvcom_nfds;
+ goto poll_done_update_nfds;
+ }
+ }
+
+ /* check if any file descriptors changed status */
+ if ((libc_nfds && rlibc_nfds > 0) || (vcom_nfds && rvcom_nfds > 0))
+ {
+ /* something interesting happened */
+ rv = rlibc_nfds + rvcom_nfds;
+ goto poll_done_update_nfds;
+ }
+
+ rv = clock_gettime (CLOCK_MONOTONIC, &now);
+ if (rv == -1)
+ {
+ rv = -errno;
+ goto poll_done_update_nfds;
+ }
+ }
+
+ /* block indefinitely || timeout elapsed */
+ while ((__timeout < 0) || timespec_compare (&now, &end_time) < 0);
+
+ /* timeout expired before anything interesting happened */
+ rv = 0;
+
+poll_done_update_nfds:
+ for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
+ {
+ /* ignore negative fds in vcom_fds
+ * 00. user negated fds
+ * 01. libc fds
+ * */
+ if (vcom_fds[fds_idx].fd < 0)
+ {
+ continue;
+ }
+
+ /* from here on handle positive vcom fds */
+ /*
+ * restore vcom fds to positive number in __fds
+ * and update revents in __fds with the events
+ * that actually occurred in vcom fds
+ * */
+ __fds[fds_idx].fd = -__fds[fds_idx].fd;
+ if (rvcom_nfds)
+ {
+ __fds[fds_idx].revents = vcom_fds[fds_idx].revents;
+ }
+ }
+
+poll_done:
+ if (VCOM_DEBUG > 2)
+ fprintf (stderr, "[%d] vpoll: " "'%04d'='%08lu'\n", pid, rv, __nfds);
+ return rv;
+}
+
+/*
+ * 00. The field __fds[i].fd contains a file descriptor for an
+ * open file.
+ * If this field is negative, then the corresponding
+ * events field is ignored and the revents field returns zero.
+ * The field __fds[i].events is an input parameter.
+ * The field __fds[i].revents is an output parameter.
+ * 01. Specifying a negative value in timeout
+ * means an infinite timeout.
+ * Specifying a timeout of zero causes poll() to return
+ * immediately, even if no file descriptors are ready.
+ *
+ * NOTE: observed __nfds is less than 128 from kubecon strace files
+ */
+
+
+int
+poll (struct pollfd *__fds, nfds_t __nfds, int __timeout)
+{
+ int rv = 0;
+ pid_t pid = getpid ();
+
+
+ if (VCOM_DEBUG > 2)
+ fprintf (stderr, "[%d] poll1: " "'%04d'='%08lu, %d, 0x%x'\n",
+ pid, rv, __nfds, __fds[0].fd, __fds[0].events);
+ rv = vcom_poll (__fds, __nfds, __timeout);
+ if (VCOM_DEBUG > 2)
+ fprintf (stderr, "[%d] poll2: " "'%04d'='%08lu, %d, 0x%x'\n",
+ pid, rv, __nfds, __fds[0].fd, __fds[0].revents);
+ if (rv < 0)
+ {
+ errno = -rv;
+ return -1;
+ }
+ return rv;
+}
+
+#ifdef __USE_GNU
+/* Like poll, but before waiting the threads signal mask is replaced
+ with that specified in the fourth parameter. For better usability,
+ the timeout value is specified using a TIMESPEC object.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+int
+vcom_ppoll (struct pollfd *__fds, nfds_t __nfds,
+ const struct timespec *__timeout, const __sigset_t * __ss)
+{
+ if (vcom_init () != 0)
+ {
+ return -1;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+int
+ppoll (struct pollfd *__fds, nfds_t __nfds,
+ const struct timespec *__timeout, const __sigset_t * __ss)
+{
+ int rv = 0;
+
+ errno = EOPNOTSUPP;
+ rv = -1;
+ return rv;
+}
+#endif
+
+void CONSTRUCTOR_ATTRIBUTE vcom_constructor (void);
+
+void DESTRUCTOR_ATTRIBUTE vcom_destructor (void);
+
+void
+vcom_constructor (void)
+{
+ pid_t pid = getpid ();
+
+ swrap_constructor ();
+ if (vcom_init () != 0)
+ {
+ printf ("\n[%d] vcom_constructor...failed!\n", pid);
+ }
+ else
+ {
+ printf ("\n[%d] vcom_constructor...done!\n", pid);
+ }
+}
+
+/*
+ * This function is called when the library is unloaded
+ */
+void
+vcom_destructor (void)
+{
+ pid_t pid = getpid ();
+
+ vcom_destroy ();
+ swrap_destructor ();
+ printf ("\n[%d] vcom_destructor...done!\n", pid);
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vcl/vcom.h b/src/vcl/vcom.h
new file mode 100644
index 00000000000..da9dc5f7bf4
--- /dev/null
+++ b/src/vcl/vcom.h
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vcom_h
+#define included_vcom_h
+
+/* VCOM DEBUG flag. Setting this to 1 or 0 turns off
+ ASSERT & other debugging code. */
+#ifndef VCOM_DEBUG
+#define VCOM_DEBUG 0
+#endif
+
+#include <vcl/vcom_glibc_socket.h>
+
+#define MAX_VCOM_APP_NAME 256
+
+/* Returns 0 on success or -1 on error. */
+extern int vcom_set_app_name (char *__app_name);
+
+/*
+ *
+ * File descriptor based APIs
+ *
+ */
+
+/*
+ * vpp implementation of glibc APIs from <unistd.h>
+ */
+extern int vcom_close (int __fd);
+
+extern ssize_t __wur vcom_read (int __fd, void *__buf, size_t __nbytes);
+
+extern ssize_t __wur vcom_write (int __fd, const void *__buf, size_t __n);
+
+extern ssize_t __wur vcom_readv (int __fd, const struct iovec *__iov,
+ int __iovcnt);
+
+extern ssize_t __wur vcom_writev (int __fd, const struct iovec *__iov,
+ int __iovcnt);
+
+/*
+ * vpp implementation of glibc APIs from <fcntl.h>
+ */
+extern int vcom_fcntl (int __fd, int __cmd, ...);
+
+/*
+ * VPP implementation of glibc APIs ioctl
+ */
+extern int vcom_ioctl (int __fd, unsigned long int __cmd, ...);
+
+/*
+ * vpp implementation of glibc APIs from <sys/select.h>
+ */
+extern int
+vcom_select (int __nfds, fd_set * __restrict __readfds,
+ fd_set * __restrict __writefds,
+ fd_set * __restrict __exceptfds,
+ struct timeval *__restrict __timeout);
+
+#ifdef __USE_XOPEN2K
+extern int
+vcom_pselect (int __nfds, fd_set * __restrict __readfds,
+ fd_set * __restrict __writefds,
+ fd_set * __restrict __exceptfds,
+ const struct timespec *__restrict __timeout,
+ const __sigset_t * __restrict __sigmask);
+#endif
+
+/*
+ * vpp implementation of glibc APIs from <sys/socket.h>
+ */
+extern int __THROW vcom_socket (int __domain, int __type, int __protocol);
+
+/* On Linux, the only supported domain for this call is AF_UNIX
+* (or synonymously, AF_LOCAL). Most implementations have the
+* same restriction.
+* vpp does not implement AF_UNIX domain in this release.
+* */
+extern int __THROW
+vcom_socketpair (int __domain, int __type, int __protocol, int __fds[2]);
+
+extern int __THROW
+vcom_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len);
+
+extern int __THROW
+vcom_getsockname (int __fd, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __len);
+
+extern int
+vcom_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len);
+
+extern int __THROW
+vcom_getpeername (int __fd, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __len);
+
+extern ssize_t
+vcom_send (int __fd, const void *__buf, size_t __n, int __flags);
+
+extern ssize_t vcom_recv (int __fd, void *__buf, size_t __n, int __flags);
+
+extern ssize_t
+vcom_sendto (int __fd, const void *__buf, size_t __n,
+ int __flags, __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len);
+
+extern ssize_t
+vcom_recvfrom (int __fd, void *__restrict __buf,
+ size_t __n, int __flags,
+ __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len);
+
+extern ssize_t
+vcom_sendmsg (int __fd, const struct msghdr *__message, int __flags);
+
+#ifdef __USE_GNU
+extern int
+sendmmsg (int __fd, struct mmsghdr *__vmessages,
+ unsigned int __vlen, int __flags);
+#endif
+
+extern ssize_t vcom_recvmsg (int __fd, struct msghdr *__message, int __flags);
+
+#ifdef __USE_GNU
+extern int
+vcom_recvmmsg (int __fd, struct mmsghdr *__vmessages,
+ unsigned int __vlen, int __flags, struct timespec *__tmo);
+#endif
+
+extern int __THROW
+vcom_getsockopt (int __fd, int __level, int __optname,
+ void *__restrict __optval, socklen_t * __restrict __optlen);
+
+extern int __THROW
+vcom_setsockopt (int __fd, int __level, int __optname,
+ const void *__optval, socklen_t __optlen);
+
+extern int __THROW vcom_listen (int __fd, int __n);
+
+extern int
+vcom_accept (int __fd, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __addr_len);
+
+#ifdef __USE_GNU
+/*
+ * Similar to 'accept' but takes an additional parameter to specify
+ * flags.
+ * */
+/* TBD: implemented later */
+extern int
+vcom_accept4 (int __fd, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __addr_len, int __flags);
+#endif
+
+extern int __THROW vcom_shutdown (int __fd, int __how);
+
+extern int __THROW vcom_epoll_create (int __size);
+
+extern int __THROW vcom_epoll_create1 (int __flags);
+
+extern int __THROW
+vcom_epoll_ctl (int __epfd, int __op, int __fd, struct epoll_event *__event);
+
+extern int
+vcom_epoll_wait (int __epfd, struct epoll_event *__events,
+ int __maxevents, int __timeout);
+
+extern int
+vcom_epoll_pwait (int __epfd, struct epoll_event *__events,
+ int __maxevents, int __timeout, const __sigset_t * __ss);
+
+/*
+ * NOTE: observed __nfds is less than 128 from kubecon strace files
+ * for the POC, it's fair to assume that nfds is less than 1024.
+ * TBD: make it thread safe and design to scale.
+ * */
+#define MAX_POLL_NFDS_DEFAULT 1024
+extern int vcom_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout);
+
+#ifdef __USE_GNU
+extern int
+vcom_ppoll (struct pollfd *__fds, nfds_t __nfds,
+ const struct timespec *__timeout, const __sigset_t * __ss);
+#endif
+
+
+#endif /* included_vcom_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vcl/vcom_glibc_socket.h b/src/vcl/vcom_glibc_socket.h
new file mode 100644
index 00000000000..4eb60fb65c7
--- /dev/null
+++ b/src/vcl/vcom_glibc_socket.h
@@ -0,0 +1,351 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vcom_glibc_socket_h
+#define included_vcom_glibc_socket_h
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <arpa/inet.h>
+#include <fcntl.h>
+
+#include <sys/epoll.h>
+
+#include <signal.h>
+#include <poll.h>
+
+/*
+ *
+ * Generic glibc fd api
+ *
+ */
+/*
+ * glibc APIs from <unistd.h>
+ */
+
+/* Close the file descriptor FD.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+extern int close (int __fd);
+
+/* Read NBYTES into BUF from FD. Return the
+ number read, -1 for errors or 0 for EOF.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+extern ssize_t __wur read (int __fd, void *__buf, size_t __nbytes);
+
+/* Write N bytes of BUF to FD. Return the number written, or -1.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+extern ssize_t __wur write (int __fd, const void *__buf, size_t __n);
+
+
+/*
+ * glibc APIs from <fcntl.h>
+ */
+
+/* Do the file control operation described by CMD on FD.
+ The remaining arguments are interpreted depending on CMD.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+extern int fcntl (int __fd, int __cmd, ...);
+
+
+/*
+ * glibc APIs from <sys/select.h>
+ */
+
+/* Check the first NFDS descriptors each in READFDS (if not NULL) for read
+ readiness, in WRITEFDS (if not NULL) for write readiness, and in EXCEPTFDS
+ (if not NULL) for exceptional conditions. If TIMEOUT is not NULL, time out
+ after waiting the interval specified therein. Returns the number of ready
+ descriptors, or -1 for errors.
+
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+extern int
+select (int __nfds, fd_set * __restrict __readfds,
+ fd_set * __restrict __writefds,
+ fd_set * __restrict __exceptfds,
+ struct timeval *__restrict __timeout);
+
+#ifdef __USE_XOPEN2K
+/* Same as above only that the TIMEOUT value is given with higher
+ resolution and a sigmask which is been set temporarily. This version
+ should be used.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+extern int
+pselect (int __nfds, fd_set * __restrict __readfds,
+ fd_set * __restrict __writefds,
+ fd_set * __restrict __exceptfds,
+ const struct timespec *__restrict __timeout,
+ const __sigset_t * __restrict __sigmask);
+#endif
+
+
+/*
+ *
+ * Socket specific glibc api
+ *
+ */
+
+/*
+ * glibc APIs from <sys/socket.h>
+ */
+
+/* Create a new socket of type TYPE in domain DOMAIN, using
+ protocol PROTOCOL. If PROTOCOL is zero, one is chosen automatically.
+ Returns a file descriptor for the new socket, or -1 for errors. */
+extern int __THROW socket (int __domain, int __type, int __protocol);
+
+/* Create two new sockets, of type TYPE in domain DOMAIN and using
+ protocol PROTOCOL, which are connected to each other, and put file
+ descriptors for them in FDS[0] and FDS[1]. If PROTOCOL is zero,
+ one will be chosen automatically. Returns 0 on success, -1 for errors. */
+extern int __THROW
+socketpair (int __domain, int __type, int __protocol, int __fds[2]);
+
+/* Give the socket FD the local address ADDR (which is LEN bytes long). */
+extern int __THROW
+bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len);
+
+/* Put the local address of FD into *ADDR and its length in *LEN. */
+extern int __THROW
+getsockname (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len);
+
+/* Open a connection on socket FD to peer at ADDR (which LEN bytes long).
+ For connectionless socket types, just set the default address to send to
+ and the only address from which to accept transmissions.
+ Return 0 on success, -1 for errors.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+extern int connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len);
+
+/* Put the address of the peer connected to socket FD into *ADDR
+ (which is *LEN bytes long), and its actual length into *LEN. */
+extern int __THROW
+getpeername (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len);
+
+/* Send N bytes of BUF to socket FD. Returns the number sent or -1.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+extern ssize_t send (int __fd, const void *__buf, size_t __n, int __flags);
+
+/* Read N bytes into BUF from socket FD.
+ Returns the number read or -1 for errors.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+extern ssize_t recv (int __fd, void *__buf, size_t __n, int __flags);
+
+/* Send N bytes of BUF on socket FD to peer at address ADDR (which is
+ ADDR_LEN bytes long). Returns the number sent, or -1 for errors.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+extern ssize_t
+sendto (int __fd, const void *__buf, size_t __n,
+ int __flags, __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len);
+
+/* Read N bytes into BUF through socket FD.
+ If ADDR is not NULL, fill in *ADDR_LEN bytes of it with tha address of
+ the sender, and store the actual size of the address in *ADDR_LEN.
+ Returns the number of bytes read or -1 for errors.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+extern ssize_t
+recvfrom (int __fd, void *__restrict __buf,
+ size_t __n, int __flags,
+ __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len);
+
+/* Send a message described MESSAGE on socket FD.
+ Returns the number of bytes sent, or -1 for errors.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+extern ssize_t
+sendmsg (int __fd, const struct msghdr *__message, int __flags);
+
+#ifdef __USE_GNU
+/* Send a VLEN messages as described by VMESSAGES to socket FD.
+ Returns the number of datagrams successfully written or -1 for errors.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+extern int
+sendmmsg (int __fd, struct mmsghdr *__vmessages,
+ unsigned int __vlen, int __flags);
+#endif
+
+/* Receive a message as described by MESSAGE from socket FD.
+ Returns the number of bytes read or -1 for errors.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+extern ssize_t recvmsg (int __fd, struct msghdr *__message, int __flags);
+
+#ifdef __USE_GNU
+/* Receive up to VLEN messages as described by VMESSAGES from socket FD.
+ Returns the number of messages received or -1 for errors.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+extern int
+recvmmsg (int __fd, struct mmsghdr *__vmessages,
+ unsigned int __vlen, int __flags, struct timespec *__tmo);
+#endif
+
+
+/* Put the current value for socket FD's option OPTNAME at protocol level LEVEL
+ into OPTVAL (which is *OPTLEN bytes long), and set *OPTLEN to the value's
+ actual length. Returns 0 on success, -1 for errors. */
+extern int __THROW
+getsockopt (int __fd, int __level, int __optname,
+ void *__restrict __optval, socklen_t * __restrict __optlen);
+
+/* Set socket FD's option OPTNAME at protocol level LEVEL
+ to *OPTVAL (which is OPTLEN bytes long).
+ Returns 0 on success, -1 for errors. */
+extern int __THROW
+setsockopt (int __fd, int __level, int __optname,
+ const void *__optval, socklen_t __optlen);
+
+/* Prepare to accept connections on socket FD.
+ N connection requests will be queued before further requests are refused.
+ Returns 0 on success, -1 for errors. */
+extern int __THROW listen (int __fd, int __n);
+
+/* Await a connection on socket FD.
+ When a connection arrives, open a new socket to communicate with it,
+ set *ADDR (which is *ADDR_LEN bytes long) to the address of the connecting
+ peer and *ADDR_LEN to the address's actual length, and return the
+ new socket's descriptor, or -1 for errors.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+extern int
+accept (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len);
+
+#ifdef __USE_GNU
+/* Similar to 'accept' but takes an additional parameter to specify flags.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+ /* TBD: implemented later */
+extern int
+accept4 (int __fd, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __addr_len, int __flags);
+#endif
+
+/* Shut down all or part of the connection open on socket FD.
+ HOW determines what to shut down:
+ SHUT_RD = No more receptions;
+ SHUT_WR = No more transmissions;
+ SHUT_RDWR = No more receptions or transmissions.
+ Returns 0 on success, -1 for errors. */
+extern int __THROW shutdown (int __fd, int __how);
+
+
+/*
+ * glibc APIs from <sys/epoll.h>
+ */
+
+/* Creates an epoll instance. Returns an fd for the new instance.
+ The "size" parameter is a hint specifying the number of file
+ descriptors to be associated with the new instance. The fd
+ returned by epoll_create() should be closed with close(). */
+extern int __THROW epoll_create (int __size);
+
+/* Same as epoll_create but with an FLAGS parameter. The unused SIZE
+ parameter has been dropped. */
+extern int __THROW epoll_create1 (int __flags);
+
+/* Manipulate an epoll instance "epfd". Returns 0 in case of success,
+ -1 in case of error ( the "errno" variable will contain the
+ specific error code ) The "op" parameter is one of the EPOLL_CTL_*
+ constants defined above. The "fd" parameter is the target of the
+ operation. The "event" parameter describes which events the caller
+ is interested in and any associated user data. */
+extern int __THROW
+epoll_ctl (int __epfd, int __op, int __fd, struct epoll_event *__event);
+
+#define EP_INT_MAX ((int)(~0U>>1))
+#define EP_MAX_EVENTS (EP_INT_MAX / sizeof(struct epoll_event))
+
+/* Wait for events on an epoll instance "epfd". Returns the number of
+ triggered events returned in "events" buffer. Or -1 in case of
+ error with the "errno" variable set to the specific error code. The
+ "events" parameter is a buffer that will contain triggered
+ events. The "maxevents" is the maximum number of events to be
+ returned ( usually size of "events" ). The "timeout" parameter
+ specifies the maximum wait time in milliseconds (-1 == infinite).
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+extern int
+epoll_wait (int __epfd, struct epoll_event *__events,
+ int __maxevents, int __timeout);
+
+/* Same as epoll_wait, but the thread's signal mask is temporarily
+ and atomically replaced with the one provided as parameter.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+extern int
+epoll_pwait (int __epfd, struct epoll_event *__events,
+ int __maxevents, int __timeout, const __sigset_t * __ss);
+
+/* Poll the file descriptors described by the NFDS structures starting at
+ FDS. If TIMEOUT is nonzero and not -1, allow TIMEOUT milliseconds for
+ an event to occur; if TIMEOUT is -1, block until an event occurs.
+ Returns the number of file descriptors with events, zero if timed out,
+ or -1 for errors.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+extern int poll (struct pollfd *__fds, nfds_t __nfds, int __timeout);
+
+#ifdef __USE_GNU
+/* Like poll, but before waiting the threads signal mask is replaced
+ with that specified in the fourth parameter. For better usability,
+ the timeout value is specified using a TIMESPEC object.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+extern int ppoll (struct pollfd *__fds, nfds_t __nfds,
+ const struct timespec *__timeout, const __sigset_t * __ss);
+#endif
+
+
+#endif /* included_vcom_glibc_socket_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vcl/vcom_socket.c b/src/vcl/vcom_socket.c
new file mode 100644
index 00000000000..8806ebd011a
--- /dev/null
+++ b/src/vcl/vcom_socket.c
@@ -0,0 +1,3443 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/uio.h>
+#include <limits.h>
+#define __need_IOV_MAX
+#include <bits/stdio_lim.h>
+#include <netinet/tcp.h>
+
+#include <vppinfra/types.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/pool.h>
+
+#include <vcl/vcom_socket.h>
+#include <vcl/vcom_socket_wrapper.h>
+#include <vcl/vcom.h>
+
+#include <vcl/vppcom.h>
+
+
+/*
+ * VCOM_SOCKET Private definitions and functions.
+ */
+
+typedef struct vcom_socket_main_t_
+{
+ u8 init;
+
+ /* vcom_socket pool */
+ vcom_socket_t *vsockets;
+
+ /* Hash table for socketidx to fd mapping */
+ uword *sockidx_by_fd;
+
+ /* vcom_epoll pool */
+ vcom_epoll_t *vepolls;
+
+ /* Hash table for epollidx to epfd mapping */
+ uword *epollidx_by_epfd;
+
+
+ /* common epitem poll for all epfd */
+ /* TBD: epitem poll per epfd */
+ /* vcom_epitem pool */
+ vcom_epitem_t *vepitems;
+
+ /* Hash table for epitemidx to epfdfd mapping */
+ uword *epitemidx_by_epfdfd;
+
+ /* Hash table - key:epfd, value:vec of epitemidx */
+ uword *epitemidxs_by_epfd;
+ /* Hash table - key:fd, value:vec of epitemidx */
+ uword *epitemidxs_by_fd;
+
+} vcom_socket_main_t;
+
+vcom_socket_main_t vcom_socket_main;
+
+
+static int
+vcom_socket_open_socket (int domain, int type, int protocol)
+{
+ int rv = -1;
+
+ /* handle domains implemented by vpp */
+ switch (domain)
+ {
+ case AF_INET:
+ case AF_INET6:
+ /* get socket type and
+ * handle the socket types supported by vpp */
+ switch (type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+ {
+ case SOCK_STREAM:
+ case SOCK_DGRAM:
+ /* the type argument serves a second purpose,
+ * in addition to specifying a socket type,
+ * it may include the bitwise OR of any of
+ * SOCK_NONBLOCK and SOCK_CLOEXEC, to modify
+ * the behavior of socket. */
+ rv = libc_socket (domain, type, protocol);
+ if (rv == -1)
+ rv = -errno;
+ break;
+
+ default:
+ break;
+ }
+
+ break;
+
+ default:
+ break;
+ }
+
+ return rv;
+}
+
+static int
+vcom_socket_open_epoll (int flags)
+{
+ int rv = -1;
+
+ if (flags < 0)
+ {
+ return -EINVAL;
+ }
+ if (flags && (flags & ~EPOLL_CLOEXEC))
+ {
+ return -EINVAL;
+ }
+
+ /* flags can be either zero or EPOLL_CLOEXEC */
+ rv = libc_epoll_create1 (flags);
+ if (rv == -1)
+ rv = -errno;
+
+ return rv;
+}
+
+static int
+vcom_socket_close_socket (int fd)
+{
+ int rv;
+
+ rv = libc_close (fd);
+ if (rv == -1)
+ rv = -errno;
+
+ return rv;
+}
+
+static int
+vcom_socket_close_epoll (int epfd)
+{
+ int rv;
+
+ rv = libc_close (epfd);
+ if (rv == -1)
+ rv = -errno;
+
+ return rv;
+}
+
+/*
+ * Public API functions
+ */
+
+
+int
+vcom_socket_is_vcom_fd (int fd)
+{
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+ p = hash_get (vsm->sockidx_by_fd, fd);
+
+ if (p)
+ {
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+ if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND)
+ return 1;
+ }
+ return 0;
+}
+
+int
+vcom_socket_is_vcom_epfd (int epfd)
+{
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_epoll_t *vepoll;
+
+ p = hash_get (vsm->epollidx_by_epfd, epfd);
+
+ if (p)
+ {
+ vepoll = pool_elt_at_index (vsm->vepolls, p[0]);
+ if (vepoll && vepoll->type == EPOLL_TYPE_VPPCOM_BOUND)
+ return 1;
+ }
+ return 0;
+}
+
+static inline int
+vcom_socket_get_sid (int fd)
+{
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+ p = hash_get (vsm->sockidx_by_fd, fd);
+
+ if (p)
+ {
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+ if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND)
+ return vsock->sid;
+ }
+ return INVALID_SESSION_ID;
+}
+
+static inline int
+vcom_socket_get_vep_idx (int epfd)
+{
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_epoll_t *vepoll;
+
+ p = hash_get (vsm->epollidx_by_epfd, epfd);
+
+ if (p)
+ {
+ vepoll = pool_elt_at_index (vsm->vepolls, p[0]);
+ if (vepoll && vepoll->type == EPOLL_TYPE_VPPCOM_BOUND)
+ return vepoll->vep_idx;
+ }
+ return INVALID_VEP_IDX;
+}
+
+static inline int
+vcom_socket_get_sid_and_vsock (int fd, vcom_socket_t ** vsockp)
+{
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+ p = hash_get (vsm->sockidx_by_fd, fd);
+
+ if (p)
+ {
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+ if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND)
+ {
+ *vsockp = vsock;
+ return vsock->sid;
+ }
+ }
+ return INVALID_SESSION_ID;
+}
+
+static inline int
+vcom_socket_get_vep_idx_and_vepoll (int epfd, vcom_epoll_t ** vepollp)
+{
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_epoll_t *vepoll;
+
+ p = hash_get (vsm->epollidx_by_epfd, epfd);
+
+ if (p)
+ {
+ vepoll = pool_elt_at_index (vsm->vepolls, p[0]);
+ if (vepoll && vepoll->type == EPOLL_TYPE_VPPCOM_BOUND)
+ {
+ *vepollp = vepoll;
+ return vepoll->vep_idx;
+ }
+ }
+ return INVALID_VEP_IDX;
+}
+
+
+static int
+vcom_socket_close_vepoll (int epfd)
+{
+ int rv = -1;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_epoll_t *vepoll;
+
+ p = hash_get (vsm->epollidx_by_epfd, epfd);
+ if (!p)
+ return -EBADF;
+
+ vepoll = pool_elt_at_index (vsm->vepolls, p[0]);
+ if (!vepoll)
+ return -EBADF;
+
+ if (vepoll->type != EPOLL_TYPE_VPPCOM_BOUND)
+ return -EINVAL;
+
+ if (vepoll->count)
+ {
+ if (!vepoll->close)
+ {
+ vepoll->close = 1;
+ return 0;
+ }
+ else
+ {
+ return -EBADF;
+ }
+ }
+
+ /* count is zero */
+ rv = vppcom_session_close (vepoll->vep_idx);
+ rv = vcom_socket_close_epoll (vepoll->epfd);
+
+ vepoll_init (vepoll);
+ hash_unset (vsm->epollidx_by_epfd, epfd);
+ pool_put (vsm->vepolls, vepoll);
+
+ return rv;
+}
+
+static int
+vcom_socket_close_vsock (int fd)
+{
+ int rv = -1;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+ vcom_epitem_t *vepitem;
+
+ i32 *vepitemidxs = 0;
+ i32 *vepitemidxs_var = 0;
+
+ p = hash_get (vsm->sockidx_by_fd, fd);
+ if (!p)
+ return -EBADF;
+
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+ if (!vsock)
+ return -ENOTSOCK;
+
+ if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
+ return -EINVAL;
+
+ rv = vppcom_session_close (vsock->sid);
+ rv = vcom_socket_close_socket (vsock->fd);
+
+ vsocket_init (vsock);
+ hash_unset (vsm->sockidx_by_fd, fd);
+ pool_put (vsm->vsockets, vsock);
+
+ /*
+ * NOTE:
+ * Before calling close(), user should remove
+ * this fd from the epoll-set of all epoll instances,
+ * otherwise resource(epitems) leaks ensues.
+ */
+
+ /*
+ * 00. close all epoll instances that are marked as "close"
+ * of which this fd is the "last" remaining member.
+ * 01. epitems associated with this fd are intentionally
+ * not removed, see NOTE: above.
+ * */
+
+ /* does this fd participate in epoll */
+ p = hash_get (vsm->epitemidxs_by_fd, fd);
+ if (p)
+ {
+ vepitemidxs = *(i32 **) p;
+ vec_foreach (vepitemidxs_var, vepitemidxs)
+ {
+ vepitem = pool_elt_at_index (vsm->vepitems, vepitemidxs_var[0]);
+ if (vepitem && vepitem->fd == fd &&
+ vepitem->type == FD_TYPE_VCOM_SOCKET)
+ {
+ i32 vep_idx;
+ vcom_epoll_t *vepoll;
+ if ((vep_idx =
+ vcom_socket_get_vep_idx_and_vepoll (vepitem->epfd,
+ &vepoll)) !=
+ INVALID_VEP_IDX)
+ {
+ if (vepoll->close)
+ {
+ if (vepoll->count == 1)
+ {
+ /*
+ * force count to zero and
+ * close this epoll instance
+ * */
+ vepoll->count = 0;
+ vcom_socket_close_vepoll (vepoll->epfd);
+ }
+ else
+ {
+ vepoll->count -= 1;
+ }
+ }
+ }
+ }
+
+ }
+ }
+
+ return rv;
+}
+
+int
+vcom_socket_close (int __fd)
+{
+ int rv;
+
+ if (vcom_socket_is_vcom_fd (__fd))
+ {
+ rv = vcom_socket_close_vsock (__fd);
+ }
+ else if (vcom_socket_is_vcom_epfd (__fd))
+ {
+ rv = vcom_socket_close_vepoll (__fd);
+ }
+ else
+ {
+ rv = -EBADF;
+ }
+
+ return rv;
+}
+
+ssize_t
+vcom_socket_read (int __fd, void *__buf, size_t __nbytes)
+{
+ int rv = -1;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+ p = hash_get (vsm->sockidx_by_fd, __fd);
+ if (!p)
+ return -EBADF;
+
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+ if (!vsock)
+ return -ENOTSOCK;
+
+ if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
+ return -EINVAL;
+
+ if (!__buf)
+ {
+ return -EINVAL;
+ }
+
+ rv = vcom_fcntl (__fd, F_GETFL, 0);
+ if (rv < 0)
+ {
+ return rv;
+
+ }
+
+ /* is blocking */
+ if (!(rv & O_NONBLOCK))
+ {
+ do
+ {
+ rv = vppcom_session_read (vsock->sid, __buf, __nbytes);
+ }
+ while (rv == -EAGAIN || rv == -EWOULDBLOCK);
+ return rv;
+ }
+ /* The file descriptor refers to a socket and has been
+ * marked nonblocking(O_NONBLOCK) and the read would
+ * block.
+ * */
+ /* is non blocking */
+ rv = vppcom_session_read (vsock->sid, __buf, __nbytes);
+ return rv;
+}
+
+ssize_t
+vcom_socket_readv (int __fd, const struct iovec * __iov, int __iovcnt)
+{
+ int rv;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+ ssize_t total = 0, len = 0;
+ int i;
+
+ p = hash_get (vsm->sockidx_by_fd, __fd);
+ if (!p)
+ return -EBADF;
+
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+ if (!vsock)
+ return -ENOTSOCK;
+
+ if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
+ return -EINVAL;
+
+ if (__iov == 0 || __iovcnt == 0 || __iovcnt > IOV_MAX)
+ return -EINVAL;
+
+ /* Sanity check */
+ for (i = 0; i < __iovcnt; ++i)
+ {
+ if (SSIZE_MAX - len < __iov[i].iov_len)
+ return -EINVAL;
+ len += __iov[i].iov_len;
+ }
+
+ rv = vcom_fcntl (__fd, F_GETFL, 0);
+ if (rv < 0)
+ {
+ return rv;
+ }
+
+ /* is blocking */
+ if (!(rv & O_NONBLOCK))
+ {
+ do
+ {
+ for (i = 0; i < __iovcnt; ++i)
+ {
+ rv = vppcom_session_read (vsock->sid, __iov[i].iov_base,
+ __iov[i].iov_len);
+ if (rv < 0)
+ break;
+ else
+ {
+ total += rv;
+ if (rv < __iov[i].iov_len)
+ /* Read less than buffer provided, no point to continue */
+ break;
+ }
+ }
+ }
+ while ((rv == -EAGAIN || rv == -EWOULDBLOCK) && total == 0);
+ return total;
+ }
+
+ /* is non blocking */
+ for (i = 0; i < __iovcnt; ++i)
+ {
+ rv = vppcom_session_read (vsock->sid, __iov[i].iov_base,
+ __iov[i].iov_len);
+ if (rv < 0)
+ {
+ if (total > 0)
+ break;
+ else
+ {
+ errno = rv;
+ return rv;
+ }
+ }
+ else
+ {
+ total += rv;
+ if (rv < __iov[i].iov_len)
+ /* Read less than buffer provided, no point to continue */
+ break;
+ }
+ }
+ return total;
+}
+
+ssize_t
+vcom_socket_write (int __fd, const void *__buf, size_t __n)
+{
+ int rv = -1;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+ if (!__buf)
+ {
+ return -EINVAL;
+ }
+
+ p = hash_get (vsm->sockidx_by_fd, __fd);
+ if (!p)
+ return -EBADF;
+
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+ if (!vsock)
+ return -ENOTSOCK;
+
+ if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
+ return -EINVAL;
+
+ rv = vppcom_session_write (vsock->sid, (void *) __buf, __n);
+ return rv;
+}
+
+ssize_t
+vcom_socket_writev (int __fd, const struct iovec * __iov, int __iovcnt)
+{
+ int rv = -1;
+ ssize_t total = 0;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+ int i;
+
+ p = hash_get (vsm->sockidx_by_fd, __fd);
+ if (!p)
+ return -EBADF;
+
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+ if (!vsock)
+ return -ENOTSOCK;
+
+ if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
+ return -EINVAL;
+
+ if (__iov == 0 || __iovcnt == 0 || __iovcnt > IOV_MAX)
+ return -EINVAL;
+
+ for (i = 0; i < __iovcnt; ++i)
+ {
+ rv = vppcom_session_write (vsock->sid, __iov[i].iov_base,
+ __iov[i].iov_len);
+ if (rv < 0)
+ {
+ if (total > 0)
+ break;
+ else
+ return rv;
+ }
+ else
+ total += rv;
+ }
+ return total;
+}
+
+/*
+ * RETURN: 0 - invalid cmd
+ * 1 - cmd not handled by vcom and vppcom
+ * 2 - cmd handled by vcom socket resource
+ * 3 - cmd handled by vppcom
+ * */
+/* TBD: incomplete list of cmd */
+static int
+vcom_socket_check_fcntl_cmd (int __cmd)
+{
+ switch (__cmd)
+ {
+ /*cmd not handled by vcom and vppcom */
+ /* Fallthrough */
+ case F_DUPFD:
+ case F_DUPFD_CLOEXEC:
+ return 1;
+
+ /* cmd handled by vcom socket resource */
+ /* Fallthrough */
+ case F_GETFD:
+ case F_SETFD:
+ case F_GETLK:
+ case F_SETLK:
+ case F_SETLKW:
+ case F_GETOWN:
+ case F_SETOWN:
+ return 2;
+
+ /* cmd handled by vcom and vppcom */
+ case F_SETFL:
+ case F_GETFL:
+ return 3;
+
+ /* cmd not handled by vcom and vppcom */
+ default:
+ return 1;
+ }
+ return 0;
+}
+
+static inline int
+vcom_session_fcntl_va (int __sid, int __cmd, va_list __ap)
+{
+ int flags = va_arg (__ap, int);
+ int rv = -EOPNOTSUPP;
+ uint32_t size;
+
+ size = sizeof (flags);
+ if (__cmd == F_SETFL)
+ {
+ rv = vppcom_session_attr (__sid, VPPCOM_ATTR_SET_FLAGS, &flags, &size);
+ }
+ else if (__cmd == F_GETFL)
+ {
+ rv = vppcom_session_attr (__sid, VPPCOM_ATTR_GET_FLAGS, &flags, &size);
+ if (rv == VPPCOM_OK)
+ rv = flags;
+ }
+
+ return rv;
+}
+
+int
+vcom_socket_fcntl_va (int __fd, int __cmd, va_list __ap)
+{
+ int rv = -EBADF;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+ p = hash_get (vsm->sockidx_by_fd, __fd);
+ if (!p)
+ return -EBADF;
+
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+ if (!vsock)
+ return -ENOTSOCK;
+
+ if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
+ return -EINVAL;
+
+ switch (vcom_socket_check_fcntl_cmd (__cmd))
+ {
+ /* invalid cmd */
+ case 0:
+ rv = -EBADF;
+ break;
+ /*cmd not handled by vcom and vppcom */
+ case 1:
+ rv = libc_vfcntl (vsock->fd, __cmd, __ap);
+ break;
+ /* cmd handled by vcom socket resource */
+ case 2:
+ rv = libc_vfcntl (vsock->fd, __cmd, __ap);
+ break;
+ /* cmd handled by vppcom */
+ case 3:
+ rv = vcom_session_fcntl_va (vsock->sid, __cmd, __ap);
+ break;
+
+ default:
+ rv = -EINVAL;
+ break;
+ }
+
+ return rv;
+}
+
+/*
+ * RETURN: 0 - invalid cmd
+ * 1 - cmd not handled by vcom and vppcom
+ * 2 - cmd handled by vcom socket resource
+ * 3 - cmd handled by vppcom
+ */
+static int
+vcom_socket_check_ioctl_cmd (unsigned long int __cmd)
+{
+ int rc;
+
+ switch (__cmd)
+ {
+ /* cmd handled by vppcom */
+ case FIONREAD:
+ rc = 3;
+ break;
+
+ /* cmd not handled by vcom and vppcom */
+ default:
+ rc = 1;
+ break;
+ }
+ return rc;
+}
+
+static inline int
+vcom_session_ioctl_va (int __sid, int __cmd, va_list __ap)
+{
+ int rv;
+
+ if (__cmd == FIONREAD)
+ rv = vppcom_session_attr (__sid, VPPCOM_ATTR_GET_NREAD, 0, 0);
+ else
+ rv = -EOPNOTSUPP;
+ return rv;
+}
+
+int
+vcom_socket_ioctl_va (int __fd, unsigned long int __cmd, va_list __ap)
+{
+ int rv = -EBADF;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+ p = hash_get (vsm->sockidx_by_fd, __fd);
+ if (!p)
+ return -EBADF;
+
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+ if (!vsock)
+ return -ENOTSOCK;
+
+ if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
+ return -EINVAL;
+
+ switch (vcom_socket_check_ioctl_cmd (__cmd))
+ {
+ /* Not supported cmd */
+ case 0:
+ rv = -EOPNOTSUPP;
+ break;
+
+ /* cmd not handled by vcom and vppcom */
+ case 1:
+ rv = libc_vioctl (vsock->fd, __cmd, __ap);
+ break;
+
+ /* cmd handled by vcom socket resource */
+ case 2:
+ rv = libc_vioctl (vsock->fd, __cmd, __ap);
+ break;
+
+ /* cmd handled by vppcom */
+ case 3:
+ rv = vcom_session_ioctl_va (vsock->sid, __cmd, __ap);
+ break;
+
+ default:
+ rv = -EINVAL;
+ break;
+ }
+
+ return rv;
+}
+
+static inline int
+vcom_socket_fds_2_sid_fds (
+ /* dest */
+ int *vcom_nsid_fds,
+ fd_set * __restrict vcom_rd_sid_fds,
+ fd_set * __restrict vcom_wr_sid_fds,
+ fd_set * __restrict vcom_ex_sid_fds,
+ /* src */
+ int vcom_nfds,
+ fd_set * __restrict vcom_readfds,
+ fd_set * __restrict vcom_writefds,
+ fd_set * __restrict vcom_exceptfds)
+{
+ int rv = 0;
+ int fd;
+ int sid;
+ /* invalid max_sid is -1 */
+ int max_sid = -1;
+ int nsid = 0;
+
+ /*
+ * set sid in sid sets corresponding to fd's in fd sets
+ * compute nsid and vcom_nsid_fds from sid sets
+ */
+
+ for (fd = 0; fd < vcom_nfds; fd++)
+ {
+ /*
+ * F fd set, src
+ * S sid set, dest
+ */
+#define _(S,F) \
+ if ((F) && (S) && FD_ISSET (fd, (F))) \
+ { \
+ sid = vcom_socket_get_sid (fd); \
+ if (sid != INVALID_SESSION_ID) \
+ { \
+ FD_SET (sid, (S)); \
+ if (sid > max_sid) \
+ { \
+ max_sid = sid; \
+ } \
+ ++nsid; \
+ } \
+ else \
+ { \
+ rv = -EBADFD; \
+ goto done; \
+ } \
+ }
+
+
+ _(vcom_rd_sid_fds, vcom_readfds);
+ _(vcom_wr_sid_fds, vcom_writefds);
+ _(vcom_ex_sid_fds, vcom_exceptfds);
+#undef _
+ }
+
+ *vcom_nsid_fds = max_sid != -1 ? max_sid + 1 : 0;
+ rv = nsid;
+
+done:
+ return rv;
+}
+
+/*
+ * PRE: 00. sid sets were derived from fd sets
+ * 01. sid sets were updated with sids that actually changed
+ * status
+ * 02. fd sets still has watched fds
+ *
+ * This function will modify in place fd sets to indicate which fd's
+ * actually changed status(inferred from sid sets)
+ */
+static inline int
+vcom_socket_sid_fds_2_fds (
+ /* dest */
+ int *new_vcom_nfds,
+ int vcom_nfds,
+ fd_set * __restrict vcom_readfds,
+ fd_set * __restrict vcom_writefds,
+ fd_set * __restrict vcom_exceptfds,
+ /* src */
+ int vcom_nsid_fds,
+ fd_set * __restrict vcom_rd_sid_fds,
+ fd_set * __restrict vcom_wr_sid_fds,
+ fd_set * __restrict vcom_ex_sid_fds)
+{
+ int rv = 0;
+ int fd;
+ int sid;
+ /* invalid max_fd is -1 */
+ int max_fd = -1;
+ int nfd = 0;
+
+
+ /*
+ * modify in place fd sets to indicate which fd's
+ * actually changed status(inferred from sid sets)
+ */
+ for (fd = 0; fd < vcom_nfds; fd++)
+ {
+ /*
+ * F fd set, dest
+ * S sid set, src
+ */
+#define _(S,F) \
+ if ((F) && (S) && FD_ISSET (fd, (F))) \
+ { \
+ sid = vcom_socket_get_sid (fd); \
+ if (sid != INVALID_SESSION_ID) \
+ { \
+ if (!FD_ISSET (sid, (S))) \
+ { \
+ FD_CLR(fd, (F)); \
+ } \
+ } \
+ else \
+ { \
+ rv = -EBADFD; \
+ goto done; \
+ } \
+ }
+
+
+ _(vcom_rd_sid_fds, vcom_readfds);
+ _(vcom_wr_sid_fds, vcom_writefds);
+ _(vcom_ex_sid_fds, vcom_exceptfds);
+#undef _
+ }
+
+ /*
+ * compute nfd and new_vcom_nfds from fd sets
+ */
+ for (fd = 0; fd < vcom_nfds; fd++)
+ {
+
+#define _(F) \
+ if ((F) && FD_ISSET (fd, (F))) \
+ { \
+ if (fd > max_fd) \
+ { \
+ max_fd = fd; \
+ } \
+ ++nfd; \
+ }
+
+
+ _(vcom_readfds);
+ _(vcom_writefds);
+ _(vcom_exceptfds);
+#undef _
+
+ }
+
+ *new_vcom_nfds = max_fd != -1 ? max_fd + 1 : 0;
+ rv = nfd;
+
+done:
+ return rv;
+}
+
+/*
+ * PRE:
+ * vom_socket_select is always called with
+ * timeout->tv_sec and timeout->tv_usec set to zero.
+ * hence vppcom_select return immediately.
+ */
+/*
+ * TBD: do{body;} while(timeout conditional); timeout loop
+ */
+int
+vcom_socket_select (int vcom_nfds, fd_set * __restrict vcom_readfds,
+ fd_set * __restrict vcom_writefds,
+ fd_set * __restrict vcom_exceptfds,
+ struct timeval *__restrict timeout)
+{
+ static unsigned long vcom_nsid_fds = 0;
+ int vcom_nsid = 0;
+ int rv = -EBADF;
+ pid_t pid = getpid ();
+
+ int new_vcom_nfds = 0;
+ int new_vcom_nfd = 0;
+
+ /* vcom sid fds */
+ fd_set vcom_rd_sid_fds;
+ fd_set vcom_wr_sid_fds;
+ fd_set vcom_ex_sid_fds;
+
+ /* in seconds eg. 3.123456789 seconds */
+ double time_to_wait = (double) 0;
+
+ /* validate inputs */
+ if (vcom_nfds < 0)
+ {
+ return -EINVAL;
+ }
+
+ /* convert timeval timeout to double time_to_wait */
+ if (timeout)
+ {
+ if (timeout->tv_sec == 0 && timeout->tv_usec == 0)
+ {
+ /* polling: vppcom_select returns immediately */
+ time_to_wait = (double) 0;
+ }
+ else
+ {
+ /*TBD: use timeval api */
+ time_to_wait = (double) timeout->tv_sec +
+ (double) timeout->tv_usec / (double) 1000000 +
+ (double) (timeout->tv_usec % 1000000) / (double) 1000000;
+ }
+ }
+ else
+ {
+ /*
+ * no timeout: vppcom_select can block indefinitely
+ * waiting for a file descriptor to become ready
+ * */
+ /* set to a phantom value */
+ time_to_wait = ~0;
+ }
+
+ /* zero the sid_sets */
+ /*
+ * F fd set
+ * S sid set
+ */
+#define _(S,F) \
+ if ((F)) \
+ { \
+ FD_ZERO ((S)); \
+ }
+
+
+ _(&vcom_rd_sid_fds, vcom_readfds);
+ _(&vcom_wr_sid_fds, vcom_writefds);
+ _(&vcom_ex_sid_fds, vcom_exceptfds);
+#undef _
+
+ if (vcom_nfds == 0)
+ {
+ if (time_to_wait > 0)
+ {
+ if (VCOM_DEBUG > 0)
+ fprintf (stderr,
+ "[%d] vcom_socket_select called to "
+ "emulate delay_ns()!\n", pid);
+ rv = vppcom_select (0, NULL, NULL, NULL, time_to_wait);
+ }
+ else
+ {
+ fprintf (stderr, "[%d] vcom_socket_select called vcom_nfds = 0 "
+ "and invalid time_to_wait (%f)!\n", pid, time_to_wait);
+ }
+ return 0;
+ }
+
+ /* populate read, write and except sid_sets */
+ vcom_nsid = vcom_socket_fds_2_sid_fds (
+ /* dest */
+ vcom_readfds || vcom_writefds
+ || vcom_exceptfds ? (int *)
+ &vcom_nsid_fds : NULL,
+ vcom_readfds ? &vcom_rd_sid_fds :
+ NULL,
+ vcom_writefds ? &vcom_wr_sid_fds :
+ NULL,
+ vcom_exceptfds ? &vcom_ex_sid_fds :
+ NULL,
+ /* src */
+ vcom_nfds,
+ vcom_readfds,
+ vcom_writefds, vcom_exceptfds);
+ if (vcom_nsid < 0)
+ {
+ return vcom_nsid;
+ }
+
+ rv = vppcom_select (vcom_nsid_fds,
+ vcom_readfds ? (unsigned long *) &vcom_rd_sid_fds :
+ NULL,
+ vcom_writefds ? (unsigned long *) &vcom_wr_sid_fds :
+ NULL,
+ vcom_exceptfds ? (unsigned long *) &vcom_ex_sid_fds :
+ NULL, time_to_wait);
+ if (VCOM_DEBUG > 2)
+ fprintf (stderr, "[%d] called vppcom_select(): "
+ "'%04d'='%04d'\n", pid, rv, (int) vcom_nsid_fds);
+
+ /* check if any file descriptors changed status */
+ if (rv > 0)
+ {
+ /*
+ * on exit, sets are modified in place to indicate which
+ * file descriptors actually changed status
+ * */
+
+ /*
+ * comply with pre-condition
+ * do not clear vcom fd sets befor calling
+ * vcom_socket_sid_fds_2_fds
+ */
+ new_vcom_nfd = vcom_socket_sid_fds_2_fds (
+ /* dest */
+ &new_vcom_nfds,
+ vcom_nfds,
+ vcom_readfds,
+ vcom_writefds,
+ vcom_exceptfds,
+ /* src */
+ vcom_nsid_fds,
+ vcom_readfds ?
+ &vcom_rd_sid_fds : NULL,
+ vcom_writefds ?
+ &vcom_wr_sid_fds : NULL,
+ vcom_exceptfds ?
+ &vcom_ex_sid_fds : NULL);
+ if (new_vcom_nfd < 0)
+ {
+ return new_vcom_nfd;
+ }
+ if (new_vcom_nfds < 0)
+ {
+ return -EINVAL;
+ }
+ rv = new_vcom_nfd;
+ }
+ return rv;
+}
+
+
+int
+vcom_socket_socket (int __domain, int __type, int __protocol)
+{
+ int rv = -1;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ vcom_socket_t *vsock;
+
+ i32 fd;
+ i32 sid;
+ i32 sockidx;
+ u8 is_nonblocking = __type & SOCK_NONBLOCK ? 1 : 0;
+ int type = __type & ~(SOCK_NONBLOCK | SOCK_CLOEXEC);
+
+ fd = vcom_socket_open_socket (__domain, __type, __protocol);
+ if (fd < 0)
+ {
+ rv = fd;
+ goto out;
+ }
+
+ sid = vppcom_session_create (VPPCOM_VRF_DEFAULT,
+ (type == SOCK_DGRAM) ?
+ VPPCOM_PROTO_UDP : VPPCOM_PROTO_TCP,
+ is_nonblocking);
+ if (sid < 0)
+ {
+ rv = sid;
+ goto out_close_socket;
+ }
+
+ pool_get (vsm->vsockets, vsock);
+ vsocket_init (vsock);
+
+ sockidx = vsock - vsm->vsockets;
+ hash_set (vsm->sockidx_by_fd, fd, sockidx);
+
+ vsocket_set (vsock, fd, sid, SOCKET_TYPE_VPPCOM_BOUND);
+ return fd;
+
+out_close_socket:
+ vcom_socket_close_socket (fd);
+out:
+ return rv;
+}
+
+int
+vcom_socket_socketpair (int __domain, int __type, int __protocol,
+ int __fds[2])
+{
+/* TBD: */
+ return 0;
+}
+
+int
+vcom_socket_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
+{
+ int rv = -1;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+ vppcom_endpt_t ep;
+
+ p = hash_get (vsm->sockidx_by_fd, __fd);
+ if (!p)
+ return -EBADF;
+
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+ if (!vsock)
+ return -ENOTSOCK;
+
+ if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
+ return -EINVAL;
+
+ if (!__addr)
+ {
+ return -EINVAL;
+ }
+
+ ep.vrf = VPPCOM_VRF_DEFAULT;
+ switch (__addr->sa_family)
+ {
+ case AF_INET:
+ if (__len != sizeof (struct sockaddr_in))
+ {
+ return -EINVAL;
+ }
+ ep.is_ip4 = VPPCOM_IS_IP4;
+ ep.ip = (u8 *) & ((const struct sockaddr_in *) __addr)->sin_addr;
+ ep.port = (u16) ((const struct sockaddr_in *) __addr)->sin_port;
+ break;
+
+ case AF_INET6:
+ if (__len != sizeof (struct sockaddr_in6))
+ {
+ return -EINVAL;
+ }
+ ep.is_ip4 = VPPCOM_IS_IP6;
+ ep.ip = (u8 *) & ((const struct sockaddr_in6 *) __addr)->sin6_addr;
+ ep.port = (u16) ((const struct sockaddr_in6 *) __addr)->sin6_port;
+ break;
+
+ default:
+ return -1;
+ break;
+ }
+
+ rv = vppcom_session_bind (vsock->sid, &ep);
+ return rv;
+}
+
+static inline int
+vcom_session_getsockname (int sid, vppcom_endpt_t * ep)
+{
+ int rv;
+ uint32_t size = sizeof (*ep);
+
+ rv = vppcom_session_attr (sid, VPPCOM_ATTR_GET_LCL_ADDR, ep, &size);
+ return rv;
+}
+
+int
+vcom_socket_getsockname (int __fd, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __len)
+{
+ int rv = -1;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+
+ p = hash_get (vsm->sockidx_by_fd, __fd);
+ if (!p)
+ return -EBADF;
+
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+ if (!vsock)
+ return -ENOTSOCK;
+
+ if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
+ return -EINVAL;
+
+ if (!__addr || !__len)
+ return -EFAULT;
+
+ vppcom_endpt_t ep;
+ ep.ip = (u8 *) & ((const struct sockaddr_in *) __addr)->sin_addr;
+ rv = vcom_session_getsockname (vsock->sid, &ep);
+ if (rv == 0)
+ {
+ if (ep.vrf == VPPCOM_VRF_DEFAULT)
+ {
+ __addr->sa_family = ep.is_ip4 == VPPCOM_IS_IP4 ? AF_INET : AF_INET6;
+ switch (__addr->sa_family)
+ {
+ case AF_INET:
+ ((struct sockaddr_in *) __addr)->sin_port = ep.port;
+ *__len = sizeof (struct sockaddr_in);
+ break;
+
+ case AF_INET6:
+ ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port;
+ *__len = sizeof (struct sockaddr_in6);
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+
+ return rv;
+}
+
+int
+vcom_socket_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
+{
+ int rv = -1;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+ vppcom_endpt_t ep;
+
+ p = hash_get (vsm->sockidx_by_fd, __fd);
+ if (p)
+ {
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+
+ ep.vrf = VPPCOM_VRF_DEFAULT;
+ switch (__addr->sa_family)
+ {
+ case AF_INET:
+ ep.is_ip4 = VPPCOM_IS_IP4;
+ ep.ip =
+ (uint8_t *) & ((const struct sockaddr_in *) __addr)->sin_addr;
+ ep.port =
+ (uint16_t) ((const struct sockaddr_in *) __addr)->sin_port;
+ break;
+
+ case AF_INET6:
+ ep.is_ip4 = VPPCOM_IS_IP6;
+ ep.ip =
+ (uint8_t *) & ((const struct sockaddr_in6 *) __addr)->sin6_addr;
+ ep.port =
+ (uint16_t) ((const struct sockaddr_in6 *) __addr)->sin6_port;
+ break;
+
+ default:
+ return -1;
+ break;
+ }
+
+ rv = vppcom_session_connect (vsock->sid, &ep);
+ }
+ return rv;
+}
+
+static inline int
+vcom_session_getpeername (int sid, vppcom_endpt_t * ep)
+{
+ int rv;
+ uint32_t size = sizeof (*ep);
+
+ rv = vppcom_session_attr (sid, VPPCOM_ATTR_GET_PEER_ADDR, ep, &size);
+ return rv;
+}
+
+int
+vcom_socket_getpeername (int __fd, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __len)
+{
+ int rv = -1;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+
+ p = hash_get (vsm->sockidx_by_fd, __fd);
+ if (!p)
+ return -EBADF;
+
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+ if (!vsock)
+ return -ENOTSOCK;
+
+ if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
+ return -EINVAL;
+
+ if (!__addr || !__len)
+ return -EFAULT;
+
+ vppcom_endpt_t ep;
+ ep.ip = (u8 *) & ((const struct sockaddr_in *) __addr)->sin_addr;
+ rv = vcom_session_getpeername (vsock->sid, &ep);
+ if (rv == 0)
+ {
+ if (ep.vrf == VPPCOM_VRF_DEFAULT)
+ {
+ __addr->sa_family = ep.is_ip4 == VPPCOM_IS_IP4 ? AF_INET : AF_INET6;
+ switch (__addr->sa_family)
+ {
+ case AF_INET:
+ ((struct sockaddr_in *) __addr)->sin_port = ep.port;
+ *__len = sizeof (struct sockaddr_in);
+ break;
+
+ case AF_INET6:
+ ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port;
+ *__len = sizeof (struct sockaddr_in6);
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+
+ return rv;
+}
+
+ssize_t
+vcom_socket_send (int __fd, const void *__buf, size_t __n, int __flags)
+{
+ return vcom_socket_sendto (__fd, __buf, __n, __flags, NULL, 0);
+}
+
+ssize_t
+vcom_socket_recv (int __fd, void *__buf, size_t __n, int __flags)
+{
+ int rv = -1;
+ rv = vcom_socket_recvfrom (__fd, __buf, __n, __flags, NULL, 0);
+ return rv;
+}
+
+/*
+ * RETURN 1 if __fd is (SOCK_STREAM, SOCK_SEQPACKET),
+ * 0 otherwise
+ * */
+int
+vcom_socket_is_connection_mode_socket (int __fd)
+{
+ int rv = -1;
+ /* TBD define new vppcom api */
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+ int type;
+ socklen_t optlen;
+
+ p = hash_get (vsm->sockidx_by_fd, __fd);
+
+ if (p)
+ {
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+ if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND)
+ {
+ optlen = sizeof (type);
+ rv = libc_getsockopt (__fd, SOL_SOCKET, SO_TYPE, &type, &optlen);
+ if (rv != 0)
+ {
+ return 0;
+ }
+ /* get socket type */
+ switch (type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+ {
+ case SOCK_STREAM:
+ case SOCK_SEQPACKET:
+ return 1;
+ break;
+
+ default:
+ return 0;
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+static inline ssize_t
+vcom_session_sendto (int __sid, void *__buf, size_t __n,
+ int __flags, __CONST_SOCKADDR_ARG __addr,
+ socklen_t __addr_len)
+{
+ int rv = -1;
+ /* TBD add new vpp api */
+ /* TBD add flags parameter */
+ rv = vppcom_session_write (__sid, (void *) __buf, (int) __n);
+ return rv;
+}
+
+ssize_t
+vcom_socket_sendto (int __fd, const void *__buf, size_t __n,
+ int __flags, __CONST_SOCKADDR_ARG __addr,
+ socklen_t __addr_len)
+{
+ int rv = -1;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+ if (!__buf)
+ {
+ return -EINVAL;
+ }
+
+ p = hash_get (vsm->sockidx_by_fd, __fd);
+ if (!p)
+ return -EBADF;
+
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+ if (!vsock)
+ return -ENOTSOCK;
+
+ if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
+ {
+ return -EINVAL;
+ }
+
+ if (vcom_socket_is_connection_mode_socket (__fd))
+ {
+ /* ignore __addr and _addr_len */
+ /* and EISCONN may be returned when they are not NULL and 0 */
+ if ((__addr != NULL) || (__addr_len != 0))
+ {
+ return -EISCONN;
+ }
+ }
+ else
+ {
+ if (!__addr)
+ {
+ return -EDESTADDRREQ;
+ }
+ /* not a vppcom supported address family */
+ if ((__addr->sa_family != AF_INET) || (__addr->sa_family != AF_INET6))
+ {
+ return -EINVAL;
+ }
+ }
+
+ rv = vcom_session_sendto (vsock->sid, (void *) __buf, (int) __n,
+ __flags, __addr, __addr_len);
+ return rv;
+}
+
+static inline ssize_t
+vcom_session_recvfrom (int __sid, void *__restrict __buf, size_t __n,
+ int __flags, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __addr_len)
+{
+ int rv = -1;
+
+ /* TBD add flags parameter */
+ rv = vppcom_session_read (__sid, __buf, __n);
+ return rv;
+}
+
+ssize_t
+vcom_socket_recvfrom (int __fd, void *__restrict __buf, size_t __n,
+ int __flags, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __addr_len)
+{
+ int rv = -1;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+ if (!__buf || !__addr || !__addr_len)
+ {
+ return -EINVAL;
+ }
+
+ p = hash_get (vsm->sockidx_by_fd, __fd);
+ if (!p)
+ return -EBADF;
+
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+ if (!vsock)
+ return -ENOTSOCK;
+
+ if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
+ {
+ return -EINVAL;
+ }
+
+ rv = vcom_session_recvfrom (vsock->sid, __buf, __n,
+ __flags, __addr, __addr_len);
+ return rv;
+}
+
+/* TBD: move it to vppcom */
+static inline ssize_t
+vcom_session_sendmsg (int __sid, const struct msghdr *__message, int __flags)
+{
+ int rv = -1;
+ /* rv = vppcom_session_write (__sid, (void *) __message->__buf,
+ (int)__n); */
+ return rv;
+}
+
+ssize_t
+vcom_socket_sendmsg (int __fd, const struct msghdr * __message, int __flags)
+{
+ int rv = -1;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+ p = hash_get (vsm->sockidx_by_fd, __fd);
+ if (!p)
+ return -EBADF;
+
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+ if (!vsock)
+ return -ENOTSOCK;
+
+ if (vcom_socket_is_connection_mode_socket (__fd))
+ {
+ /* ignore __addr and _addr_len */
+ /* and EISCONN may be returned when they are not NULL and 0 */
+ if ((__message->msg_name != NULL) || (__message->msg_namelen != 0))
+ {
+ return -EISCONN;
+ }
+ }
+ else
+ {
+ /* TBD: validate __message->msg_name and __message->msg_namelen
+ * and return -EINVAL on validation error
+ * */
+ ;
+ }
+
+ rv = vcom_session_sendmsg (vsock->sid, __message, __flags);
+
+ return rv;
+}
+
+#ifdef __USE_GNU
+int
+vcom_socket_sendmmsg (int __fd, struct mmsghdr *__vmessages,
+ unsigned int __vlen, int __flags)
+{
+
+ /* TBD: define a new vppcom api */
+ return 0;
+}
+#endif
+
+/* TBD: move it to vppcom */
+static inline ssize_t
+vcom_session_recvmsg (int __sid, struct msghdr *__message, int __flags)
+{
+ int rv = -1;
+ /* rv = vppcom_session_read (__sid, (void *) __message->__buf,
+ (int)__n); */
+ rv = -EOPNOTSUPP;
+ return rv;
+}
+
+ssize_t
+vcom_socket_recvmsg (int __fd, struct msghdr * __message, int __flags)
+{
+ int rv = -1;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+ p = hash_get (vsm->sockidx_by_fd, __fd);
+ if (!p)
+ return -EBADF;
+
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+ if (!vsock)
+ return -ENOTSOCK;
+
+ if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
+ return -EINVAL;
+
+ if (!__message)
+ {
+ return -EINVAL;
+ }
+
+ /* validate __flags */
+
+ rv = vcom_session_recvmsg (vsock->sid, __message, __flags);
+ return rv;
+}
+
+#ifdef __USE_GNU
+int
+vcom_socket_recvmmsg (int __fd, struct mmsghdr *__vmessages,
+ unsigned int __vlen, int __flags,
+ struct timespec *__tmo)
+{
+ /* TBD: define a new vppcom api */
+ return 0;
+}
+#endif
+
+/* TBD: move it to vppcom */
+static inline int
+vcom_session_get_sockopt (int __sid, int __level, int __optname,
+ void *__restrict __optval,
+ socklen_t * __restrict __optlen)
+{
+ /* 1. for socket level options that are NOT socket attributes
+ * and that has corresponding vpp options get from vppcom */
+ return 0;
+}
+
+int
+vcom_socket_getsockopt (int __fd, int __level, int __optname,
+ void *__restrict __optval,
+ socklen_t * __restrict __optlen)
+{
+ int rv = -1;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+ p = hash_get (vsm->sockidx_by_fd, __fd);
+ if (!p)
+ return -EBADF;
+
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+ if (!vsock)
+ return -ENOTSOCK;
+
+ if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
+ return -EINVAL;
+
+ if (!__optval && !__optlen)
+ return -EFAULT;
+
+ switch (__level)
+ {
+ /* handle options at socket level */
+ case SOL_SOCKET:
+ switch (__optname)
+ {
+/*
+ * 1. for socket level options that are socket attributes,
+ * get from libc_getsockopt.
+ * 2. for socket level options that are NOT socket
+ * attributes and that has corresponding vpp options
+ * get from vppcom.
+ * 3. for socket level options unimplemented
+ * return -ENOPROTOOPT */
+ case SO_DEBUG:
+ case SO_DONTROUTE:
+ case SO_BROADCAST:
+ case SO_SNDBUF:
+ case SO_RCVBUF:
+ case SO_REUSEADDR:
+ case SO_REUSEPORT:
+ case SO_KEEPALIVE:
+ case SO_TYPE:
+ case SO_PROTOCOL:
+ case SO_DOMAIN:
+ case SO_ERROR:
+ case SO_OOBINLINE:
+ case SO_NO_CHECK:
+ case SO_PRIORITY:
+ case SO_LINGER:
+ case SO_BSDCOMPAT:
+ case SO_TIMESTAMP:
+ case SO_TIMESTAMPNS:
+ case SO_TIMESTAMPING:
+ case SO_RCVTIMEO:
+ case SO_SNDTIMEO:
+ case SO_RCVLOWAT:
+ case SO_SNDLOWAT:
+ case SO_PASSCRED:
+ case SO_PEERCRED:
+ case SO_PEERNAME:
+ case SO_ACCEPTCONN:
+ case SO_PASSSEC:
+ case SO_PEERSEC:
+ case SO_MARK:
+ case SO_RXQ_OVFL:
+ case SO_WIFI_STATUS:
+ case SO_PEEK_OFF:
+ case SO_NOFCS:
+ case SO_BINDTODEVICE:
+ case SO_GET_FILTER:
+ case SO_LOCK_FILTER:
+ case SO_BPF_EXTENSIONS:
+ case SO_SELECT_ERR_QUEUE:
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ case SO_BUSY_POLL:
+#endif
+ case SO_MAX_PACING_RATE:
+#ifdef SO_INCOMING_CPU
+ case SO_INCOMING_CPU:
+#endif
+ rv = libc_getsockopt (__fd, __level, __optname, __optval, __optlen);
+ if (rv != 0)
+ {
+ rv = -errno;
+ return rv;
+ }
+ break;
+
+ default:
+ /* We implement the SO_SNDLOWAT etc to not be settable
+ * (1003.1g 7).
+ */
+ return -ENOPROTOOPT;
+ }
+
+ break;
+
+ default:
+ /* 1. handle options that are NOT socket level options,
+ * but have corresponding vpp otions. */
+ rv = vcom_session_get_sockopt (vsock->sid, __level, __optname,
+ __optval, __optlen);
+ break;
+ }
+
+ return rv;
+}
+
+/* TBD: move it to vppcom */
+static inline int
+vcom_session_setsockopt (int __sid, int __level, int __optname,
+ const void *__optval, socklen_t __optlen)
+{
+ int rv = -EOPNOTSUPP;
+
+ switch (__level)
+ {
+ case SOL_TCP:
+ switch (__optname)
+ {
+ case TCP_KEEPIDLE:
+ rv =
+ vppcom_session_attr (__sid, VPPCOM_ATTR_SET_TCP_KEEPIDLE, 0, 0);
+ break;
+ case TCP_KEEPINTVL:
+ rv =
+ vppcom_session_attr (__sid, VPPCOM_ATTR_SET_TCP_KEEPINTVL, 0, 0);
+ break;
+ default:
+ break;
+ }
+ break;
+ case SOL_IPV6:
+ switch (__optname)
+ {
+ case IPV6_V6ONLY:
+ rv = vppcom_session_attr (__sid, VPPCOM_ATTR_SET_V6ONLY, 0, 0);
+ break;
+ default:
+ break;
+ }
+ break;
+ case SOL_SOCKET:
+ switch (__optname)
+ {
+ case SO_KEEPALIVE:
+ rv = vppcom_session_attr (__sid, VPPCOM_ATTR_SET_KEEPALIVE, 0, 0);
+ break;
+ case SO_REUSEADDR:
+ rv = vppcom_session_attr (__sid, VPPCOM_ATTR_SET_REUSEADDR, 0, 0);
+ break;
+ case SO_BROADCAST:
+ rv = vppcom_session_attr (__sid, VPPCOM_ATTR_SET_BROADCAST, 0, 0);
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return rv;
+}
+
+int
+vcom_socket_setsockopt (int __fd, int __level, int __optname,
+ const void *__optval, socklen_t __optlen)
+{
+ int rv = -1;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+ p = hash_get (vsm->sockidx_by_fd, __fd);
+ if (!p)
+ return -EBADF;
+
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+ if (!vsock)
+ return -ENOTSOCK;
+
+ if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND)
+ return -EINVAL;
+
+ /*
+ * Options without arguments
+ */
+
+ if (__optname == SO_BINDTODEVICE)
+ {
+ rv = libc_setsockopt (__fd, __level, __optname, __optval, __optlen);
+ if (rv != 0)
+ {
+ rv = -errno;
+ }
+ return rv;
+ }
+
+ if (!__optval)
+ return -EFAULT;
+
+ if (__optlen < sizeof (int))
+ return -EINVAL;
+
+ switch (__level)
+ {
+ case SOL_IPV6:
+ switch (__optname)
+ {
+ case IPV6_V6ONLY:
+ rv = vcom_session_setsockopt (vsock->sid, __level, __optname,
+ __optval, __optlen);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ break;
+ case SOL_TCP:
+ switch (__optname)
+ {
+ case TCP_NODELAY:
+ return 0;
+ case TCP_KEEPIDLE:
+ case TCP_KEEPINTVL:
+ rv = vcom_session_setsockopt (vsock->sid, __level, __optname,
+ __optval, __optlen);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ break;
+ /* handle options at socket level */
+ case SOL_SOCKET:
+ switch (__optname)
+ {
+ case SO_REUSEADDR:
+ case SO_BROADCAST:
+ case SO_KEEPALIVE:
+ rv = vcom_session_setsockopt (vsock->sid, __level, __optname,
+ __optval, __optlen);
+ break;
+
+ /*
+ * 1. for socket level options that are socket attributes,
+ * set it from libc_getsockopt
+ * 2. for socket level options that are NOT socket
+ * attributes and that has corresponding vpp options
+ * set it from vppcom
+ * 3. for socket level options unimplemented
+ * return -ENOPROTOOPT */
+ case SO_DEBUG:
+ case SO_DONTROUTE:
+ case SO_SNDBUF:
+ case SO_RCVBUF:
+ case SO_REUSEPORT:
+ case SO_TYPE:
+ case SO_PROTOCOL:
+ case SO_DOMAIN:
+ case SO_ERROR:
+ case SO_OOBINLINE:
+ case SO_NO_CHECK:
+ case SO_PRIORITY:
+ case SO_LINGER:
+ case SO_BSDCOMPAT:
+ case SO_TIMESTAMP:
+ case SO_TIMESTAMPNS:
+ case SO_TIMESTAMPING:
+ case SO_RCVTIMEO:
+ case SO_SNDTIMEO:
+ case SO_RCVLOWAT:
+ case SO_SNDLOWAT:
+ case SO_PASSCRED:
+ case SO_PEERCRED:
+ case SO_PEERNAME:
+ case SO_ACCEPTCONN:
+ case SO_PASSSEC:
+ case SO_PEERSEC:
+ case SO_MARK:
+ case SO_RXQ_OVFL:
+ case SO_WIFI_STATUS:
+ case SO_PEEK_OFF:
+ case SO_NOFCS:
+ /*
+ * SO_BINDTODEVICE already handled as
+ * "Options without arguments" */
+ /* case SO_BINDTODEVICE: */
+ case SO_GET_FILTER:
+ case SO_LOCK_FILTER:
+ case SO_BPF_EXTENSIONS:
+ case SO_SELECT_ERR_QUEUE:
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ case SO_BUSY_POLL:
+#endif
+ case SO_MAX_PACING_RATE:
+#ifdef SO_INCOMING_CPU
+ case SO_INCOMING_CPU:
+#endif
+ rv = libc_setsockopt (__fd, __level, __optname, __optval, __optlen);
+ if (rv != 0)
+ {
+ rv = -errno;
+ return rv;
+ }
+ break;
+
+ default:
+ /* We implement the SO_SNDLOWAT etc to not be settable
+ * (1003.1g 7).
+ */
+ return -ENOPROTOOPT;
+ }
+
+ break;
+
+ default:
+ return -ENOPROTOOPT;
+ }
+
+ return rv;
+}
+
+int
+vcom_socket_listen (int __fd, int __n)
+{
+ int rv = -1;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+ p = hash_get (vsm->sockidx_by_fd, __fd);
+ if (p)
+ {
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+
+ /* TBD vppcom to accept __n parameter */
+ rv = vppcom_session_listen (vsock->sid, __n);
+ }
+
+ return rv;
+}
+
+static int
+vcom_socket_connected_socket (int __fd, int __sid,
+ int *__domain,
+ int *__type, int *__protocol, int flags)
+{
+ int rv = -1;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ vcom_socket_t *vsock;
+
+ i32 fd;
+ i32 sockidx;
+
+ socklen_t optlen;
+
+ optlen = sizeof (*__domain);
+ rv = libc_getsockopt (__fd, SOL_SOCKET, SO_DOMAIN, __domain, &optlen);
+ if (rv != 0)
+ {
+ rv = -errno;
+ goto out;
+ }
+
+ optlen = sizeof (*__type);
+ rv = libc_getsockopt (__fd, SOL_SOCKET, SO_TYPE, __type, &optlen);
+ if (rv != 0)
+ {
+ rv = -errno;
+ goto out;
+ }
+
+ optlen = sizeof (*__protocol);
+ rv = libc_getsockopt (__fd, SOL_SOCKET, SO_PROTOCOL, __protocol, &optlen);
+ if (rv != 0)
+ {
+ rv = -errno;
+ goto out;
+ }
+
+ fd = vcom_socket_open_socket (*__domain, *__type | flags, *__protocol);
+ if (fd < 0)
+ {
+ rv = fd;
+ goto out;
+ }
+
+ pool_get (vsm->vsockets, vsock);
+ vsocket_init (vsock);
+
+ sockidx = vsock - vsm->vsockets;
+ hash_set (vsm->sockidx_by_fd, fd, sockidx);
+
+ vsocket_set (vsock, fd, __sid, SOCKET_TYPE_VPPCOM_BOUND);
+ return fd;
+
+out:
+ return rv;
+}
+
+/* If flag is 0, then accept4() is the same as accept().
+ * SOCK_NONBLOCK and SOCK_CLOEXEC can be bitwise ORed in flags
+ */
+static int
+vcom_socket_accept_flags (int __fd, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __addr_len, int flags)
+{
+ int rv = -1;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+ int fd;
+ int sid;
+ int domain;
+ int type;
+ int protocol;
+
+ uint8_t addr8[sizeof (struct in6_addr)];
+ vppcom_endpt_t ep;
+
+ ep.ip = addr8;
+
+ /* validate flags */
+
+ /*
+ * for documentation
+ * switch (flags)
+ * {
+ * case 0:
+ * case SOCK_NONBLOCK:
+ * case SOCK_CLOEXEC:
+ * case SOCK_NONBLOCK | SOCK_CLOEXEC:
+ * break;
+ *
+ * default:
+ * return -1;
+ * }
+ */
+ /* flags can be 0 or can be bitwise OR
+ * of any of SOCK_NONBLOCK and SOCK_CLOEXEC */
+
+ if (!(!flags || (flags & (SOCK_NONBLOCK | SOCK_CLOEXEC))))
+ {
+ /* TBD: return proper error code */
+ return -1;
+ }
+
+ /* TBD: return proper error code */
+
+ if (!vcom_socket_is_connection_mode_socket (__fd))
+ {
+ return -EOPNOTSUPP;
+ }
+
+ p = hash_get (vsm->sockidx_by_fd, __fd);
+ if (p)
+ {
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+
+
+ rv = vcom_fcntl (vsock->fd, F_GETFL, 0);
+ if (rv < 0)
+ {
+ return rv;
+ }
+
+ /* is blocking */
+ if (!(rv & O_NONBLOCK))
+ {
+ /* socket is not marked as nonblocking
+ * and no pending connections are present
+ * on the queue, accept () blocks the caller
+ * until a connection is present.
+ */
+ rv = vppcom_session_accept (vsock->sid, &ep,
+ -1.0 /* wait forever */ );
+ }
+ else
+ {
+ /* The file descriptor refers to a socket and has been
+ * marked nonblocking(O_NONBLOCK) and the accept would
+ * block.
+ * */
+ /* is non blocking */
+ rv = vppcom_session_accept (vsock->sid, &ep, 0);
+ /* If the socket is marked nonblocking and
+ * no pending connections are present on the
+ * queue, accept fails with the error
+ * EAGAIN or EWOULDBLOCK
+ */
+ if (rv == VPPCOM_ETIMEDOUT)
+ {
+ rv = VPPCOM_EAGAIN;
+ }
+ }
+ if (rv < 0)
+ {
+ return rv;
+ }
+
+ sid = rv;
+
+ /* create a new connected socket resource and set flags
+ * on the new file descriptor.
+ * update vsockets and sockidx_by_fd table
+ * */
+ fd = vcom_socket_connected_socket (__fd, sid,
+ &domain, &type, &protocol, flags);
+ if (fd < 0)
+ {
+ return fd;
+ }
+
+ rv = fd;
+
+ /* TBD populate __addr and __addr_len */
+ /* TBD: The returned address is truncated if the buffer
+ * provided is too small, in this case, __addr_len will
+ * return a value greater than was supplied to the call.*/
+ if (__addr)
+ {
+ if (ep.is_cut_thru)
+ {
+ /* TBD populate __addr and __addr_len */
+ switch (domain)
+ {
+ case AF_INET:
+ ((struct sockaddr_in *) __addr)->sin_family = AF_INET;
+ ((struct sockaddr_in *) __addr)->sin_port = ep.port;
+ memcpy (&((struct sockaddr_in *) __addr)->sin_addr,
+ addr8, sizeof (struct in_addr));
+ /* TBD: populate __addr_len */
+ if (__addr_len)
+ {
+ *__addr_len = sizeof (struct sockaddr_in);
+ }
+ break;
+
+ case AF_INET6:
+ ((struct sockaddr_in6 *) __addr)->sin6_family = AF_INET6;
+ ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port;
+ memcpy (((struct sockaddr_in6 *) __addr)->sin6_addr.
+ __in6_u.__u6_addr8, addr8,
+ sizeof (struct in6_addr));
+ /* TBD: populate __addr_len */
+ if (__addr_len)
+ {
+ *__addr_len = sizeof (struct sockaddr_in6);
+ }
+ break;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+ }
+ else
+ {
+ switch (ep.is_ip4)
+ {
+ case VPPCOM_IS_IP4:
+ ((struct sockaddr_in *) __addr)->sin_family = AF_INET;
+ ((struct sockaddr_in *) __addr)->sin_port = ep.port;
+ memcpy (&((struct sockaddr_in *) __addr)->sin_addr,
+ addr8, sizeof (struct in_addr));
+ /* TBD: populate __addr_len */
+ if (__addr_len)
+ {
+ *__addr_len = sizeof (struct sockaddr_in);
+ }
+ break;
+
+ case VPPCOM_IS_IP6:
+ ((struct sockaddr_in6 *) __addr)->sin6_family = AF_INET6;
+ ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port;
+ memcpy (((struct sockaddr_in6 *) __addr)->sin6_addr.
+ __in6_u.__u6_addr8, addr8,
+ sizeof (struct in6_addr));
+ /* TBD: populate __addr_len */
+ if (__addr_len)
+ {
+ *__addr_len = sizeof (struct sockaddr_in6);
+ }
+ break;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+ }
+ }
+ else
+ {
+ /* when __addr is NULL, nothing is filled in,
+ * in this case, __addr_len is not used,
+ * and should also be null
+ * */
+ if (__addr_len)
+ {
+ /* TBD: return proper error code */
+ return -1;
+ }
+ }
+ }
+
+ return rv;
+}
+
+int
+vcom_socket_accept (int __fd, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __addr_len)
+{
+ /* set flags to 0 for accept() */
+ return vcom_socket_accept_flags (__fd, __addr, __addr_len, 0);
+}
+
+#ifdef __USE_GNU
+int
+vcom_socket_accept4 (int __fd, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __addr_len, int __flags)
+{
+ /* SOCK_NONBLOCK and SOCK_CLOEXEC can be bitwise ORed in flags */
+ return vcom_socket_accept_flags (__fd, __addr, __addr_len, __flags);
+}
+#endif
+
+/* TBD: move it to vppcom */
+static inline int
+vcom_session_shutdown (int __fd, int __how)
+{
+ return 0;
+}
+
+int
+vcom_socket_shutdown (int __fd, int __how)
+{
+ int rv = -1;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ uword *p;
+ vcom_socket_t *vsock;
+
+ p = hash_get (vsm->sockidx_by_fd, __fd);
+ if (p)
+ {
+ vsock = pool_elt_at_index (vsm->vsockets, p[0]);
+ switch (__how)
+ {
+ case SHUT_RD:
+ case SHUT_WR:
+ case SHUT_RDWR:
+ rv = vcom_session_shutdown (vsock->sid, __how);
+ return rv;
+ break;
+
+ default:
+ return -EINVAL;
+ break;
+ }
+ }
+
+ return rv;
+}
+
+int
+vcom_socket_epoll_create1 (int __flags)
+{
+ int rv = -1;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ vcom_epoll_t *vepoll;
+
+ i32 epfd;
+ i32 vep_idx;
+ i32 epollidx;
+
+ epfd = vcom_socket_open_epoll (__flags);
+ if (epfd < 0)
+ {
+ rv = epfd;
+ goto out;
+ }
+
+ vep_idx = vppcom_epoll_create ();
+ if (vep_idx < 0)
+ {
+ rv = vep_idx;
+ goto out_close_epoll;
+ }
+
+ pool_get (vsm->vepolls, vepoll);
+ vepoll_init (vepoll);
+
+ epollidx = vepoll - vsm->vepolls;
+ hash_set (vsm->epollidx_by_epfd, epfd, epollidx);
+
+ vepoll_set (vepoll, epfd, vep_idx, EPOLL_TYPE_VPPCOM_BOUND, __flags, 0, 0);
+
+ return epfd;
+
+out_close_epoll:
+ vcom_socket_close_epoll (epfd);
+out:
+ return rv;
+}
+
+/*
+ * PRE: vppcom_epoll_ctl() is successful
+ * free_vepitem_on_del : 0 - no_pool_put, 1 - pool_put
+ */
+int
+vcom_socket_ctl_vepitem (int __epfd, int __op, int __fd,
+ struct epoll_event *__event,
+ i32 vep_idx, vcom_epoll_t * vepoll,
+ i32 vfd_id, void *vfd, vcom_fd_type_t type,
+ int free_vepitem_on_del)
+{
+ int rv = -1;
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ vcom_epitem_t *vepitem;
+
+ vcom_epitem_key_t epfdfd = {.epfd = __epfd,.fd = __fd };
+ uword *p;
+ i32 vepitemidx;
+
+ i32 *vepitemidxs = 0;
+
+ struct epoll_event revent = {.events = 0,.data.fd = INVALID_FD };
+
+ i32 vec_idx;
+
+ /* perform control operations on the epoll instance */
+ switch (__op)
+ {
+ case EPOLL_CTL_ADD:
+ /*
+ * supplied file descriptor is already
+ * registered with this epoll instance
+ * */
+ /* vepitem exists */
+ p = hash_get (vsm->epitemidx_by_epfdfd, epfdfd.key);
+ if (p)
+ {
+ rv = -EEXIST;
+ goto out;
+ }
+
+ /* add a new vepitem */
+ pool_get (vsm->vepitems, vepitem);
+ vepitem_init (vepitem);
+
+ vepitemidx = vepitem - vsm->vepitems;
+ hash_set (vsm->epitemidx_by_epfdfd, epfdfd.key, vepitemidx);
+ vepitem_set (vepitem, __epfd, __fd, __fd, __fd, type, *__event, revent);
+
+ /* update epitemidxs */
+ /* by_epfd */
+ p = hash_get (vsm->epitemidxs_by_epfd, __epfd);
+ if (!p) /* not exist */
+ {
+ vepitemidxs = 0;
+ vec_add1 (vepitemidxs, vepitemidx);
+ hash_set (vsm->epitemidxs_by_epfd, __epfd, vepitemidxs);
+ }
+ else /* exists */
+ {
+ vepitemidxs = *(i32 **) p;
+ vec_add1 (vepitemidxs, vepitemidx);
+ hash_set3 (vsm->epitemidxs_by_epfd, __epfd, vepitemidxs, 0);
+ }
+ /* update epitemidxs */
+ /* by_fd */
+ p = hash_get (vsm->epitemidxs_by_fd, __fd);
+ if (!p) /* not exist */
+ {
+ vepitemidxs = 0;
+ vec_add1 (vepitemidxs, vepitemidx);
+ hash_set (vsm->epitemidxs_by_fd, __fd, vepitemidxs);
+ }
+ else /* exists */
+ {
+ vepitemidxs = *(i32 **) p;
+ vec_add1 (vepitemidxs, vepitemidx);
+ hash_set3 (vsm->epitemidxs_by_fd, __fd, vepitemidxs, 0);
+ }
+
+ /* increment vepoll fd count by 1 */
+ vepoll->count += 1;
+
+ rv = 0;
+ goto out;
+ break;
+
+ case EPOLL_CTL_MOD:
+ /*
+ * supplied file descriptor is not
+ * registered with this epoll instance
+ * */
+ /* vepitem not exist */
+ p = hash_get (vsm->epitemidx_by_epfdfd, epfdfd.key);
+ if (!p)
+ {
+ rv = -ENOENT;
+ goto out;
+ }
+ vepitem = pool_elt_at_index (vsm->vepitems, p[0]);
+ if (vepitem)
+ {
+ vepitem->event = *__event;
+ vepitem->revent = revent;
+ }
+
+ rv = 0;
+ goto out;
+ break;
+
+ case EPOLL_CTL_DEL:
+ /*
+ * supplied file descriptor is not
+ * registered with this epoll instance
+ * */
+ /* vepitem not exist */
+ p = hash_get (vsm->epitemidx_by_epfdfd, epfdfd.key);
+ if (!p)
+ {
+ rv = -ENOENT;
+ goto out;
+ }
+ vepitemidx = *(i32 *) p;
+ hash_unset (vsm->epitemidx_by_epfdfd, epfdfd.key);
+
+ /* update epitemidxs */
+ /* by_epfd */
+ p = hash_get (vsm->epitemidxs_by_epfd, __epfd);
+ if (!p) /* not exist */
+ {
+ rv = -ENOENT;
+ goto out;
+ }
+ else /* exists */
+ {
+ vepitemidxs = *(i32 **) p;
+ vec_idx = vec_search (vepitemidxs, vepitemidx);
+ if (vec_idx != ~0)
+ {
+ vec_del1 (vepitemidxs, vec_idx);
+ if (!vec_len (vepitemidxs))
+ {
+ vec_free (vepitemidxs);
+ hash_unset (vsm->epitemidxs_by_epfd, __epfd);
+ }
+ }
+ }
+
+ /* update epitemidxs */
+ /* by_fd */
+ p = hash_get (vsm->epitemidxs_by_fd, __fd);
+ if (!p) /* not exist */
+ {
+ rv = -ENOENT;
+ goto out;
+ }
+ else /* exists */
+ {
+ vepitemidxs = *(i32 **) p;
+ vec_idx = vec_search (vepitemidxs, vepitemidx);
+ if (vec_idx != ~0)
+ {
+ vec_del1 (vepitemidxs, vec_idx);
+ if (!vec_len (vepitemidxs))
+ {
+ vec_free (vepitemidxs);
+ hash_unset (vsm->epitemidxs_by_fd, __fd);
+ }
+ }
+ }
+
+ /* pool put vepitem */
+ vepitem = pool_elt_at_index (vsm->vepitems, vepitemidx);
+ if (free_vepitem_on_del)
+ {
+ if (!vepitem)
+ {
+ rv = -ENOENT;
+ goto out;
+ }
+ vepitem_init (vepitem);
+ pool_put (vsm->vepitems, vepitem);
+ }
+ else
+ {
+ if (!vepitem)
+ {
+ vepitem_init (vepitem);
+ }
+ }
+
+ /* decrement vepoll fd count by 1 */
+ vepoll->count -= 1;
+
+ rv = 0;
+ goto out;
+ break;
+
+ default:
+ rv = -EINVAL;
+ goto out;
+ break;
+ }
+
+out:
+ return rv;
+}
+
+/*
+ * PRE: 00. null pointer check on __event
+ * 01. all other parameters are validated
+ */
+
+static int
+vcom_socket_epoll_ctl_internal (int __epfd, int __op, int __fd,
+ struct epoll_event *__event,
+ int free_vepitem_on_del)
+{
+ int rv = -1;
+
+ /* vcom_socket_main_t *vsm = &vcom_socket_main; */
+ vcom_epoll_t *vepoll;
+
+ /*__fd could could be vcom socket or vcom epoll or kernel fd */
+ void *vfd;
+ vcom_epoll_t *vfd_vepoll;
+ vcom_socket_t *vfd_vsock;
+
+ i32 vep_idx;
+ i32 vfd_id;
+
+ vcom_fd_type_t type = FD_TYPE_INVALID;
+
+ /* validate __event */
+
+ /* get vep_idx and vepoll */
+ vep_idx = vcom_socket_get_vep_idx_and_vepoll (__epfd, &vepoll);
+ if (vep_idx == INVALID_VEP_IDX)
+ {
+ return -EBADF;
+ }
+
+ /* get vcom fd type, vfd_id and vfd */
+ vfd_id = vcom_socket_get_sid_and_vsock (__fd, &vfd_vsock);
+ if (vfd_id != INVALID_SESSION_ID)
+ {
+ type = FD_TYPE_VCOM_SOCKET;
+ vfd = vfd_vsock;
+ }
+ else if ((vfd_id = vcom_socket_get_vep_idx_and_vepoll (__fd, &vfd_vepoll))
+ != INVALID_VEP_IDX)
+ {
+ type = FD_TYPE_EPOLL;
+ vfd = vfd_vepoll;
+ }
+ else
+ {
+ /* FD_TYPE_KERNEL not supported by epoll instance */
+ type = FD_TYPE_INVALID;
+ return -EBADF;
+ }
+
+
+ /* vepoll and vsock are now valid */
+ rv = vppcom_epoll_ctl (vep_idx, __op, vfd_id, __event);
+ if (rv < 0)
+ {
+ return rv;
+ }
+
+ rv = vcom_socket_ctl_vepitem (__epfd, __op, __fd,
+ __event,
+ vep_idx, vepoll,
+ vfd_id, vfd, type, free_vepitem_on_del);
+ return rv;
+}
+
+int
+vcom_socket_epoll_ctl (int __epfd, int __op, int __fd,
+ struct epoll_event *__event)
+{
+ int rv = -1;
+
+ rv = vcom_socket_epoll_ctl_internal (__epfd, __op, __fd, __event, 1);
+ return rv;
+}
+
+static int
+vcom_socket_epoll_ctl1 (int __epfd, int __op, int __fd,
+ struct epoll_event *__event)
+{
+ int rv = -1;
+
+ rv = vcom_socket_epoll_ctl_internal (__epfd, __op, __fd, __event, 0);
+ return rv;
+}
+
+int
+vcom_socket_epoll_pwait (int __epfd, struct epoll_event *__events,
+ int __maxevents, int __timeout,
+ const __sigset_t * __ss)
+{
+ int rv = -EBADF;
+
+ /* in seconds eg. 3.123456789 seconds */
+ double time_to_wait = (double) 0;
+
+ i32 vep_idx;
+
+ /* validate __event */
+ if (!__events)
+ {
+ rv = -EFAULT;
+ goto out;
+ }
+
+ /* validate __timeout */
+ if (__timeout > 0)
+ {
+ time_to_wait = (double) __timeout / (double) 1000;
+ }
+ else if (__timeout == 0)
+ {
+ time_to_wait = (double) 0;
+ }
+ else if (__timeout == -1)
+ {
+ time_to_wait = ~0;
+ }
+ else
+ {
+ rv = -EBADF;
+ goto out;
+ }
+
+ /* get vep_idx */
+ vep_idx = vcom_socket_get_vep_idx (__epfd);
+ if (vep_idx != INVALID_VEP_IDX)
+ {
+ rv = vppcom_epoll_wait (vep_idx, __events, __maxevents, time_to_wait);
+ }
+out:
+ return rv;
+}
+
+static inline void
+vcom_pollfds_2_selectfds (
+ /* src */
+ struct pollfd *__fds, nfds_t __nfds,
+ /* dest */
+ int vcom_nfds,
+ fd_set * __restrict vcom_readfds,
+ fd_set * __restrict vcom_writefds,
+ fd_set * __restrict vcom_exceptfds)
+{
+ nfds_t fds_idx = 0;
+
+ for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
+ {
+ /* ignore negative fds */
+ if (__fds[fds_idx].fd < 0)
+ {
+ continue;
+ }
+
+ /* for POLLRDHUP, POLLERR, POLLHUP and POLLNVAL */
+ FD_SET (__fds[fds_idx].fd, vcom_exceptfds);
+
+ /* requested events */
+ if (__fds[fds_idx].events)
+ {
+ if (__fds[fds_idx].events & POLLIN)
+ {
+ FD_SET (__fds[fds_idx].fd, vcom_readfds);
+ }
+ if (__fds[fds_idx].events & POLLPRI)
+ {
+ FD_SET (__fds[fds_idx].fd, vcom_readfds);
+ }
+ if (__fds[fds_idx].events & POLLOUT)
+ {
+ FD_SET (__fds[fds_idx].fd, vcom_writefds);
+ }
+#if defined __USE_XOPEN || defined __USE_XOPEN2K8
+ if (__fds[fds_idx].events & POLLRDNORM)
+ {
+ FD_SET (__fds[fds_idx].fd, vcom_readfds);
+ }
+ if (__fds[fds_idx].events & POLLRDBAND)
+ {
+ FD_SET (__fds[fds_idx].fd, vcom_readfds);
+ }
+ if (__fds[fds_idx].events & POLLWRNORM)
+ {
+ FD_SET (__fds[fds_idx].fd, vcom_writefds);
+ }
+ if (__fds[fds_idx].events & POLLWRBAND)
+ {
+ FD_SET (__fds[fds_idx].fd, vcom_writefds);
+ }
+#endif
+ }
+ } /* for (fds_idx = 0; fds_idx < __nfds; fds_idx++) */
+}
+
+static inline void
+vcom_selectfds_2_pollfds (
+ /* dest */
+ struct pollfd *__fds, nfds_t __nfds, int *nfd,
+ /* src */
+ int vcom_nfds,
+ fd_set * __restrict vcom_readfds,
+ fd_set * __restrict vcom_writefds,
+ fd_set * __restrict vcom_exceptfds)
+{
+ nfds_t fds_idx = 0;
+
+
+ for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
+ {
+ /* ignore negative fds */
+ if (__fds[fds_idx].fd < 0)
+ {
+ __fds[fds_idx].revents = 0;
+ }
+
+ /* for POLLRDHUP, POLLERR, POLLHUP and POLLNVAL */
+ if (FD_ISSET (__fds[fds_idx].fd, vcom_exceptfds))
+ {
+ /*
+ * TBD: for now any select exception
+ * is flagged as POLLERR
+ * */
+ __fds[fds_idx].revents |= POLLERR;
+ }
+
+ /* requested events */
+ if (__fds[fds_idx].events & POLLIN)
+ {
+ if (FD_ISSET (__fds[fds_idx].fd, vcom_readfds))
+ {
+ __fds[fds_idx].revents |= POLLIN;
+ }
+ }
+ if (__fds[fds_idx].events & POLLPRI)
+ {
+ if (FD_ISSET (__fds[fds_idx].fd, vcom_readfds))
+ {
+ __fds[fds_idx].revents |= POLLIN;
+ }
+ }
+ if (__fds[fds_idx].events & POLLOUT)
+ {
+ if (FD_ISSET (__fds[fds_idx].fd, vcom_writefds))
+ {
+ __fds[fds_idx].revents |= POLLOUT;
+ }
+ }
+#if defined __USE_XOPEN || defined __USE_XOPEN2K8
+ if (__fds[fds_idx].events & POLLRDNORM)
+ {
+ if (FD_ISSET (__fds[fds_idx].fd, vcom_readfds))
+ {
+ __fds[fds_idx].revents |= POLLRDNORM;
+ }
+ }
+ if (__fds[fds_idx].events & POLLRDBAND)
+ {
+ if (FD_ISSET (__fds[fds_idx].fd, vcom_readfds))
+ {
+ __fds[fds_idx].revents |= POLLRDBAND;
+ }
+ }
+ if (__fds[fds_idx].events & POLLWRNORM)
+ {
+ if (FD_ISSET (__fds[fds_idx].fd, vcom_writefds))
+ {
+ __fds[fds_idx].revents |= POLLWRNORM;
+ }
+ }
+ if (__fds[fds_idx].events & POLLWRBAND)
+ {
+ if (FD_ISSET (__fds[fds_idx].fd, vcom_writefds))
+ {
+ __fds[fds_idx].revents |= POLLWRBAND;
+ }
+ }
+#endif
+ } /* for (fds_idx = 0; fds_idx < __nfds; fds_idx++) */
+
+ /*
+ * nfd:
+ * the number of structures which have nonzero revents fields
+ * (in other words, those descriptors with events or
+ * errors reported)
+ * */
+ *nfd = 0;
+ for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
+ {
+ /* ignore negative fds */
+ if (__fds[fds_idx].fd < 0)
+ {
+ continue;
+ }
+
+ if (__fds[fds_idx].revents)
+ {
+ (*nfd)++;
+ }
+ }
+}
+
+/*
+ * PRE: parameters are validated,
+ * vcom_socket_poll is always called with __timeout set to zero
+ * hence returns immediately
+ *
+ * ACTION: handle non negative validated vcom fds and ignore rest
+ */
+
+/*
+ * implements vcom_socket_poll () interface
+ *
+ * internally uses vcom_socket_select ()
+ * to realize the behavior
+ * */
+int
+vcom_socket_poll_select_impl (struct pollfd *__fds, nfds_t __nfds,
+ int __timeout)
+{
+ int rv;
+ pid_t pid = getpid ();
+
+ nfds_t fds_idx = 0;
+ int nfd = 0;
+
+ /* vcom */
+ int vcom_nfds = 0;
+ fd_set vcom_readfds;
+ fd_set vcom_writefds;
+ fd_set vcom_exceptfds;
+ int vcom_nfd = -1;
+ /* invalid max_vcom_fd is -1 */
+ int max_vcom_fd = -1;
+
+ /* __timeout is zero to get ready events and return immediately */
+ struct timeval tv = {.tv_sec = 0,.tv_usec = 0 };
+
+ /* validate __nfds from select perspective */
+ if (__nfds > FD_SETSIZE)
+ {
+ rv = -EINVAL;
+ goto poll_done;
+ }
+
+ /* zero vcom fd sets */
+ /*
+ * V vcom fd set
+ */
+#define _(V) \
+ FD_ZERO ((V))
+
+ _(&vcom_readfds);
+ _(&vcom_writefds);
+ _(&vcom_exceptfds);
+#undef _
+
+ vcom_nfds = 0;
+ vcom_nfd = -1;
+
+
+ for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
+ {
+ /* ignore negative fds */
+ if (__fds[fds_idx].fd < 0)
+ {
+ continue;
+ }
+
+ /* non negative validated vcom fds */
+ if (__fds[fds_idx].fd > FD_SETSIZE)
+ {
+ rv = -EINVAL;
+ goto poll_done;
+ }
+
+ /* max_vcom_fd and vcom_nfd */
+ if (__fds[fds_idx].fd > max_vcom_fd)
+ {
+ /* requested events */
+ if (__fds[fds_idx].events)
+ {
+ max_vcom_fd = __fds[fds_idx].fd;
+ }
+ }
+ ++vcom_nfd;
+ }
+
+ vcom_nfds = max_vcom_fd != -1 ? max_vcom_fd + 1 : 0;
+
+ if (!vcom_nfds)
+ {
+ rv = vcom_nfds;
+ goto poll_done;
+ }
+
+ vcom_pollfds_2_selectfds (
+ /* src */
+ __fds, __nfds,
+ /* dest */
+ vcom_nfds,
+ &vcom_readfds, &vcom_writefds, &vcom_exceptfds);
+
+ /* select on vcom fds */
+ vcom_nfd = vcom_socket_select (vcom_nfds,
+ &vcom_readfds,
+ &vcom_writefds, &vcom_exceptfds, &tv);
+ if (VCOM_DEBUG > 2)
+ fprintf (stderr,
+ "[%d] vcom_socket_select: "
+ "'%04d'='%04d'\n", pid, vcom_nfd, vcom_nfds);
+
+ if (vcom_nfd < 0)
+ {
+ rv = vcom_nfd;
+ goto poll_done;
+ }
+
+ vcom_selectfds_2_pollfds (
+ /* dest */
+ __fds, __nfds, &nfd,
+ /* src */
+ vcom_nfds,
+ &vcom_readfds, &vcom_writefds, &vcom_exceptfds);
+
+ rv = nfd;
+
+poll_done:
+ return rv;
+}
+
+/*
+ * TBD: remove this static function once vppcom
+ * has an implementation in place
+ *
+ * ACTION:
+ */
+static int
+vppcom_poll (struct pollfd *__fds, nfds_t __nfds, double time_to_wait)
+{
+ return -EOPNOTSUPP;
+}
+
+int
+vcom_socket_poll_vppcom_impl (struct pollfd *__fds, nfds_t __nfds,
+ int __timeout)
+{
+ nfds_t fds_idx = 0;
+
+ /* in seconds eg. 3.123456789 seconds */
+ double time_to_wait = (double) 0;
+
+ i32 sid;
+ i32 vep_idx;
+
+ /* replace vcom fd with session idx */
+ for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
+ {
+ /* ignore negative fds */
+ if (__fds[fds_idx].fd < 0)
+ {
+ continue;
+ }
+
+ /* non negative validated vcom fds */
+ sid = vcom_socket_get_sid (__fds[fds_idx].fd);
+ if (sid != INVALID_SESSION_ID)
+ {
+ __fds[fds_idx].fd = sid;
+ }
+ else
+ {
+ /* get vep_idx */
+ vep_idx = vcom_socket_get_vep_idx (__fds[fds_idx].fd);
+ if (vep_idx != INVALID_VEP_IDX)
+ {
+ __fds[fds_idx].fd = vep_idx;
+ }
+ else
+ {
+ return -EBADF;
+ }
+ }
+ }
+
+ /* validate __timeout */
+ if (__timeout > 0)
+ {
+ time_to_wait = (double) __timeout / (double) 1000;
+ }
+ else if (__timeout == 0)
+ {
+ time_to_wait = (double) 0;
+ }
+ else if (__timeout < 0)
+ {
+ time_to_wait = ~0;
+ }
+ else
+ {
+ return -EBADF;
+ }
+
+ return vppcom_poll (__fds, __nfds, time_to_wait);
+}
+
+int
+vcom_socket_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout)
+{
+ /* select an implementation */
+
+ /* return vcom_socket_poll_vppcom_impl (__fds, __nfds, __timeout); */
+ return vcom_socket_poll_select_impl (__fds, __nfds, __timeout);
+}
+
+#ifdef __USE_GNU
+int
+vcom_socket_ppoll (struct pollfd *__fds, nfds_t __nfds,
+ const struct timespec *__timeout, const __sigset_t * __ss)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+int
+vcom_socket_main_init (void)
+{
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+
+ if (VCOM_DEBUG > 0)
+ printf ("vcom_socket_main_init\n");
+
+ if (!vsm->init)
+ {
+ /* TBD: define FD_MAXSIZE and use it here */
+ pool_alloc (vsm->vsockets, FD_SETSIZE);
+ vsm->sockidx_by_fd = hash_create (0, sizeof (i32));
+
+ pool_alloc (vsm->vepolls, FD_SETSIZE);
+ vsm->epollidx_by_epfd = hash_create (0, sizeof (i32));
+
+ pool_alloc (vsm->vepitems, FD_SETSIZE);
+ vsm->epitemidx_by_epfdfd = hash_create (0, sizeof (i32));
+
+ vsm->epitemidxs_by_epfd = hash_create (0, sizeof (i32 *));
+ vsm->epitemidxs_by_fd = hash_create (0, sizeof (i32 *));
+
+ vsm->init = 1;
+ }
+
+ return 0;
+}
+
+
+void
+vcom_socket_main_show (void)
+{
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ vcom_socket_t *vsock;
+
+ vcom_epoll_t *vepoll;
+
+ vcom_epitem_t *vepitem;
+
+ i32 epfd;
+ i32 fd;
+ i32 *vepitemidxs, *vepitemidxs_var;
+
+ if (vsm->init)
+ {
+ /* from active list of vsockets show vsock */
+
+ /* *INDENT-OFF* */
+ pool_foreach (vsock, vsm->vsockets,
+ ({
+ printf(
+ "fd='%04d', sid='%08x',type='%-30s'\n",
+ vsock->fd, vsock->sid,
+ vcom_socket_type_str (vsock->type));
+ }));
+ /* *INDENT-ON* */
+
+ /* from active list of vepolls, show vepoll */
+
+ /* *INDENT-OFF* */
+ pool_foreach (vepoll, vsm->vepolls,
+ ({
+ printf(
+ "epfd='%04d', vep_idx='%08x', "
+ "type='%-30s', "
+ "flags='%d', count='%d', close='%d'\n",
+ vepoll->epfd, vepoll->vep_idx,
+ vcom_socket_epoll_type_str (vepoll->type),
+ vepoll->flags, vepoll->count, vepoll->close);
+ }));
+ /* *INDENT-ON* */
+
+ /* from active list of vepitems, show vepitem */
+
+ /* *INDENT-OFF* */
+ pool_foreach (vepitem, vsm->vepitems,
+ ({
+ printf(
+ "epfd='%04d', fd='%04d', "
+ "next_fd='%04d', prev_fd='%04d', "
+ "type='%-30s', "
+ "events='%04x', revents='%04x'\n",
+ vepitem->epfd, vepitem->fd,
+ vepitem->next_fd, vepitem->prev_fd,
+ vcom_socket_vcom_fd_type_str (vepitem->type),
+ vepitem->event.events, vepitem->revent.events);
+ }));
+
+ /* *INDENT-ON* */
+
+ /* show epitemidxs for epfd */
+ /* *INDENT-OFF* */
+ hash_foreach (epfd, vepitemidxs,
+ vsm->epitemidxs_by_epfd,
+ ({
+ printf("\n[ '%04d': ", epfd);
+ vec_foreach (vepitemidxs_var,vepitemidxs)
+ {
+ printf("'%04d' ", (int)vepitemidxs_var[0]);
+ }
+ printf("]\n");
+ }));
+ /* *INDENT-ON* */
+
+ /* show epitemidxs for fd */
+ /* *INDENT-OFF* */
+ hash_foreach (fd, vepitemidxs,
+ vsm->epitemidxs_by_fd,
+ ({
+ printf("\n{ '%04d': ", fd);
+ vec_foreach (vepitemidxs_var,vepitemidxs)
+ {
+ printf("'%04d' ", (int)vepitemidxs_var[0]);
+ }
+ printf("}\n");
+ }));
+ /* *INDENT-ON* */
+
+ }
+}
+
+void
+vcom_socket_main_destroy (void)
+{
+ vcom_socket_main_t *vsm = &vcom_socket_main;
+ vcom_socket_t *vsock;
+
+ vcom_epoll_t *vepoll;
+
+ vcom_epitem_t *vepitem;
+
+ i32 epfd;
+ i32 fd;
+ i32 *vepitemidxs;
+
+
+ if (VCOM_DEBUG > 0)
+ printf ("vcom_socket_main_destroy\n");
+
+ if (vsm->init)
+ {
+
+ /*
+ * from active list of vepitems,
+ * remove all "vepitem" elements from the pool in a safe way
+ * */
+
+ /* *INDENT-OFF* */
+ pool_flush (vepitem, vsm->vepitems,
+ ({
+ if (vepitem->type == FD_TYPE_EPOLL || FD_TYPE_VCOM_SOCKET)
+ {
+ vcom_socket_epoll_ctl1 (vepitem->epfd, EPOLL_CTL_DEL,
+ vepitem->fd, NULL);
+ vepitem_init (vepitem);
+ }
+ }));
+ /* *INDENT-ON* */
+
+ pool_free (vsm->vepitems);
+ hash_free (vsm->epitemidx_by_epfdfd);
+
+ /* free vepitemidxs for each epfd */
+ /* *INDENT-OFF* */
+ hash_foreach (epfd, vepitemidxs,
+ vsm->epitemidxs_by_epfd,
+ ({
+ vec_free (vepitemidxs);
+ }));
+ /* *INDENT-ON* */
+ hash_free (vsm->epitemidxs_by_epfd);
+
+ /* free vepitemidxs for each fd */
+ /* *INDENT-OFF* */
+ hash_foreach (fd, vepitemidxs,
+ vsm->epitemidxs_by_fd,
+ ({
+ vec_free (vepitemidxs);
+ }));
+ /* *INDENT-ON* */
+ hash_free (vsm->epitemidxs_by_fd);
+
+
+ /*
+ * from active list of vsockets,
+ * close socket and vppcom session
+ * */
+
+ /* *INDENT-OFF* */
+ pool_foreach (vsock, vsm->vsockets,
+ ({
+ if (vsock->type == SOCKET_TYPE_VPPCOM_BOUND)
+ {
+ vppcom_session_close (vsock->sid);
+ vcom_socket_close_socket (vsock->fd);
+ vsocket_init (vsock);
+ }
+ }));
+ /* *INDENT-ON* */
+
+ /*
+ * return vsocket element to the pool
+ * */
+
+ /* *INDENT-OFF* */
+ pool_flush (vsock, vsm->vsockets,
+ ({
+ // vsocket_init(vsock);
+ ;
+ }));
+ /* *INDENT-ON* */
+
+ pool_free (vsm->vsockets);
+ hash_free (vsm->sockidx_by_fd);
+
+ /*
+ * from active list of vepolls,
+ * close epoll and vppcom_epoll
+ * */
+
+ /* *INDENT-OFF* */
+ pool_foreach (vepoll, vsm->vepolls,
+ ({
+ if (vepoll->type == EPOLL_TYPE_VPPCOM_BOUND)
+ {
+ vppcom_session_close (vepoll->vep_idx);
+ vcom_socket_close_epoll (vepoll->epfd); /* TBD: */
+ vepoll_init (vepoll);
+ }
+ }));
+ /* *INDENT-ON* */
+
+ /*
+ * return vepoll element to the pool
+ * */
+
+ /* *INDENT-OFF* */
+ pool_flush (vepoll, vsm->vepolls,
+ ({
+ // vepoll_init(vepoll);
+ ;
+ }));
+ /* *INDENT-ON* */
+
+ pool_free (vsm->vepolls);
+ hash_free (vsm->epollidx_by_epfd);
+
+ vsm->init = 0;
+ }
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vcl/vcom_socket.h b/src/vcl/vcom_socket.h
new file mode 100644
index 00000000000..4f5e4abfb15
--- /dev/null
+++ b/src/vcl/vcom_socket.h
@@ -0,0 +1,471 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vcom_socket_h
+#define included_vcom_socket_h
+
+#include <string.h>
+
+#include <vcl/vcom_glibc_socket.h>
+#include <vppinfra/types.h>
+#include <sys/socket.h>
+
+#define INVALID_SESSION_ID (~0)
+#define INVALID_FD (~0)
+
+#define INVALID_VEP_IDX INVALID_SESSION_ID
+#define INVALID_EPFD INVALID_FD
+
+typedef enum
+{
+ SOCKET_TYPE_UNBOUND = 0,
+ SOCKET_TYPE_KERNEL_BOUND,
+ SOCKET_TYPE_VPPCOM_BOUND
+} vcom_socket_type_t;
+
+typedef enum
+{
+ EPOLL_TYPE_UNBOUND = 0,
+ EPOLL_TYPE_KERNEL_BOUND,
+ EPOLL_TYPE_VPPCOM_BOUND
+} vcom_epoll_type_t;
+
+typedef enum
+{
+ FD_TYPE_INVALID = 0,
+ FD_TYPE_KERNEL,
+ FD_TYPE_EPOLL,
+ FD_TYPE_VCOM_SOCKET,
+ /* add new types here */
+ /* FD_TYPE_MAX should be the last entry */
+ FD_TYPE_MAX
+} vcom_fd_type_t;
+
+typedef struct
+{
+ /* file descriptor -
+ * fd 0, 1, 2 have special meaning and are reserved,
+ * -1 denote invalid fd */
+ i32 fd;
+
+ /* session id - -1 denote invalid sid */
+ i32 sid;
+
+ /* socket type */
+ vcom_socket_type_t type;
+
+ /* vcom socket attributes here */
+
+} vcom_socket_t;
+
+typedef struct
+{
+ /* epoll file descriptor -
+ * epfd 0, 1, 2 have special meaning and are reserved,
+ * -1 denote invalid epfd */
+ i32 epfd;
+
+ /* vep idx - -1 denote invalid vep_idx */
+ i32 vep_idx;
+
+ /* epoll type */
+ vcom_epoll_type_t type;
+
+ /* flags - 0 or EPOLL_CLOEXEC */
+ i32 flags;
+
+ /* vcom epoll attributes here */
+
+ /*
+ * 00. count of file descriptors currently registered
+ * on this epoll instance.
+ * 01. number of file descriptors in the epoll set.
+ * 02. EPOLL_CTL_ADD, EPOLL_CTL_MOD, EPOLL_CTL_DEL
+ * update the count.
+ * 03. cached for frequent access.
+ * */
+ i32 count;
+
+ /* close( ) called on this epoll instance */
+ /* 0 - close ( ) not called, 1 - close( ) called. */
+ u32 close;
+
+} vcom_epoll_t;
+
+typedef struct
+{
+ /* "container" of this item */
+ i32 epfd;
+
+ /* fd - file descriptor information this item refers to */
+ i32 fd;
+ /* next and prev fd in the "epoll set" of epfd */
+ i32 next_fd;
+ i32 prev_fd;
+
+ /* vcom fd type */
+ vcom_fd_type_t type;
+
+ /* interested events and the source fd */
+ struct epoll_event event;
+
+ /* ready events and the source fd */
+ struct epoll_event revent;
+
+ /* epitem attributes here */
+
+} vcom_epitem_t;
+
+typedef union vcom_epitem_key
+{
+ struct
+ {
+ i32 fd;
+ i32 epfd;
+ };
+ i64 key;
+} __EPOLL_PACKED vcom_epitem_key_t;
+
+static inline char *
+vcom_socket_type_str (vcom_socket_type_t t)
+{
+ switch (t)
+ {
+ case SOCKET_TYPE_UNBOUND:
+ return "SOCKET_TYPE_UNBOUND";
+
+ case SOCKET_TYPE_KERNEL_BOUND:
+ return "SOCKET_TYPE_KERNEL_BOUND";
+
+ case SOCKET_TYPE_VPPCOM_BOUND:
+ return "SOCKET_TYPE_VPPCOM_BOUND";
+
+ default:
+ return "SOCKET_TYPE_UNKNOWN";
+ }
+}
+
+static inline char *
+vcom_socket_epoll_type_str (vcom_epoll_type_t t)
+{
+ switch (t)
+ {
+ case EPOLL_TYPE_UNBOUND:
+ return "EPOLL_TYPE_UNBOUND";
+
+ case EPOLL_TYPE_KERNEL_BOUND:
+ return "EPOLL_TYPE_KERNEL_BOUND";
+
+ case EPOLL_TYPE_VPPCOM_BOUND:
+ return "EPOLL_TYPE_VPPCOM_BOUND";
+
+ default:
+ return "EPOLL_TYPE_UNKNOWN";
+ }
+}
+
+static inline char *
+vcom_socket_vcom_fd_type_str (vcom_fd_type_t t)
+{
+ switch (t)
+ {
+ case FD_TYPE_KERNEL:
+ return "FD_TYPE_KERNEL";
+
+ case FD_TYPE_EPOLL:
+ return "FD_TYPE_EPOLL";
+
+ case FD_TYPE_VCOM_SOCKET:
+ return "FD_TYPE_VCOM_SOCKET";
+
+ default:
+ return "FD_TYPE_UNKNOWN";
+ }
+}
+
+static inline int
+vcom_socket_type_is_vppcom_bound (vcom_socket_type_t t)
+{
+ return t == SOCKET_TYPE_VPPCOM_BOUND;
+}
+
+static inline int
+vcom_socket_epoll_type_is_vppcom_bound (vcom_epoll_type_t t)
+{
+ return t == EPOLL_TYPE_VPPCOM_BOUND;
+}
+
+static inline void
+vsocket_init (vcom_socket_t * vsock)
+{
+ memset (vsock, 0, sizeof (*vsock));
+
+ vsock->fd = INVALID_FD;
+ vsock->sid = INVALID_SESSION_ID;
+ vsock->type = SOCKET_TYPE_UNBOUND;
+ /* vcom socket attributes init here */
+}
+
+static inline void
+vepoll_init (vcom_epoll_t * vepoll)
+{
+ memset (vepoll, 0, sizeof (*vepoll));
+
+ vepoll->epfd = INVALID_EPFD;
+ vepoll->vep_idx = INVALID_VEP_IDX;
+ vepoll->type = EPOLL_TYPE_UNBOUND;
+ vepoll->flags = 0;
+
+ vepoll->count = 0;
+ vepoll->close = 0;
+ /* vcom epoll attributes init here */
+}
+
+static inline void
+vepitem_init (vcom_epitem_t * vepitem)
+{
+ struct epoll_event event = {.events = 0,.data.fd = INVALID_FD };
+
+ memset (vepitem, 0, sizeof (*vepitem));
+
+ vepitem->epfd = INVALID_EPFD;
+
+ vepitem->fd = INVALID_FD;
+ vepitem->next_fd = INVALID_FD;
+ vepitem->prev_fd = INVALID_FD;
+
+ vepitem->type = FD_TYPE_INVALID;
+
+ vepitem->event = event;
+ vepitem->revent = event;
+ /* vepoll attributes init here */
+}
+
+static inline void
+vepitemkey_init (vcom_epitem_key_t * epfdfd)
+{
+ memset (epfdfd, 0, sizeof (*epfdfd));
+
+ epfdfd->epfd = INVALID_EPFD;
+ epfdfd->fd = INVALID_FD;
+}
+
+static inline void
+vsocket_set (vcom_socket_t * vsock, i32 fd, i32 sid, vcom_socket_type_t type)
+{
+ vsock->fd = fd;
+ vsock->sid = sid;
+ vsock->type = type;
+ /* vcom socket attributes set here */
+}
+
+static inline void
+vepoll_set (vcom_epoll_t * vepoll,
+ i32 epfd, i32 vep_idx,
+ vcom_epoll_type_t type, i32 flags, i32 count, u32 close)
+{
+ vepoll->epfd = epfd;
+ vepoll->vep_idx = vep_idx;
+ vepoll->type = type;
+ vepoll->flags = flags;
+
+ vepoll->count = count;
+ vepoll->close = close;
+ /* vcom epoll attributes set here */
+}
+
+static inline void
+vepitem_set (vcom_epitem_t * vepitem,
+ i32 epfd,
+ i32 fd, i32 next_fd, i32 prev_fd,
+ vcom_fd_type_t type,
+ struct epoll_event event, struct epoll_event revent)
+{
+ vepitem->epfd = epfd;
+
+ vepitem->fd = fd;
+ vepitem->next_fd = next_fd;
+ vepitem->prev_fd = prev_fd;
+
+ vepitem->type = type;
+
+ vepitem->event = event;
+ vepitem->revent = revent;
+ /* vcom epitem attributes set here */
+}
+
+static inline void
+vepitemkey_set (vcom_epitem_key_t * epfdfd, i32 epfd, i32 fd)
+{
+ epfdfd->epfd = epfd;
+ epfdfd->fd = fd;
+}
+
+static inline int
+vsocket_is_vppcom_bound (vcom_socket_t * vsock)
+{
+ return vcom_socket_type_is_vppcom_bound (vsock->type);
+}
+
+static inline int
+vepoll_is_vppcom_bound (vcom_epoll_t * vepoll)
+{
+ return vcom_socket_epoll_type_is_vppcom_bound (vepoll->type);
+}
+
+int vcom_socket_main_init (void);
+
+void vcom_socket_main_destroy (void);
+
+void vcom_socket_main_show (void);
+
+int vcom_socket_is_vcom_fd (int fd);
+
+int vcom_socket_is_vcom_epfd (int epfd);
+
+int vcom_socket_close (int __fd);
+
+ssize_t vcom_socket_read (int __fd, void *__buf, size_t __nbytes);
+
+ssize_t vcom_socket_readv (int __fd, const struct iovec *__iov, int __iovcnt);
+
+ssize_t vcom_socket_write (int __fd, const void *__buf, size_t __n);
+
+ssize_t vcom_socket_writev (int __fd, const struct iovec *__iov,
+ int __iovcnt);
+
+int vcom_socket_fcntl_va (int __fd, int __cmd, va_list __ap);
+
+int vcom_socket_ioctl_va (int __fd, unsigned long int __cmd, va_list __ap);
+
+int
+vcom_socket_select (int vcom_nfds, fd_set * __restrict vcom_readfds,
+ fd_set * __restrict vcom_writefds,
+ fd_set * __restrict vcom_exceptfds,
+ struct timeval *__restrict timeout);
+
+
+int vcom_socket_socket (int __domain, int __type, int __protocol);
+
+int
+vcom_socket_socketpair (int __domain, int __type, int __protocol,
+ int __fds[2]);
+
+int vcom_socket_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len);
+
+int
+vcom_socket_getsockname (int __fd, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __len);
+
+int
+vcom_socket_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len);
+
+int
+vcom_socket_getpeername (int __fd, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __len);
+
+ssize_t
+vcom_socket_send (int __fd, const void *__buf, size_t __n, int __flags);
+
+ssize_t vcom_socket_recv (int __fd, void *__buf, size_t __n, int __flags);
+
+/*
+ * RETURN 1 if __fd is (SOCK_STREAM, SOCK_SEQPACKET),
+ * 0 otherwise
+ * */
+int vcom_socket_is_connection_mode_socket (int __fd);
+
+ssize_t
+vcom_socket_sendto (int __fd, const void *__buf, size_t __n,
+ int __flags, __CONST_SOCKADDR_ARG __addr,
+ socklen_t __addr_len);
+
+ssize_t
+vcom_socket_recvfrom (int __fd, void *__restrict __buf, size_t __n,
+ int __flags, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __addr_len);
+
+ssize_t
+vcom_socket_sendmsg (int __fd, const struct msghdr *__message, int __flags);
+
+#ifdef __USE_GNU
+int
+vcom_socket_sendmmsg (int __fd, struct mmsghdr *__vmessages,
+ unsigned int __vlen, int __flags);
+#endif
+
+ssize_t vcom_socket_recvmsg (int __fd, struct msghdr *__message, int __flags);
+
+#ifdef __USE_GNU
+int
+vcom_socket_recvmmsg (int __fd, struct mmsghdr *__vmessages,
+ unsigned int __vlen, int __flags,
+ struct timespec *__tmo);
+#endif
+
+int
+vcom_socket_getsockopt (int __fd, int __level, int __optname,
+ void *__restrict __optval,
+ socklen_t * __restrict __optlen);
+
+int
+vcom_socket_setsockopt (int __fd, int __level, int __optname,
+ const void *__optval, socklen_t __optlen);
+
+int vcom_socket_listen (int __fd, int __n);
+
+int
+vcom_socket_accept (int __fd, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __addr_len);
+
+#ifdef __USE_GNU
+int
+vcom_socket_accept4 (int __fd, __SOCKADDR_ARG __addr,
+ socklen_t * __restrict __addr_len, int __flags);
+#endif
+
+int vcom_socket_shutdown (int __fd, int __how);
+
+int vcom_socket_epoll_create1 (int __flags);
+
+int
+vcom_socket_epoll_ctl (int __epfd, int __op, int __fd,
+ struct epoll_event *__event);
+
+int
+vcom_socket_epoll_pwait (int __epfd, struct epoll_event *__events,
+ int __maxevents, int __timeout,
+ const __sigset_t * __ss);
+
+/*
+ * handle only vcom fds
+ */
+int vcom_socket_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout);
+
+#ifdef __USE_GNU
+int
+vcom_socket_ppoll (struct pollfd *__fds, nfds_t __nfds,
+ const struct timespec *__timeout, const __sigset_t * __ss);
+#endif
+
+#endif /* included_vcom_socket_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vcl/vcom_socket_wrapper.c b/src/vcl/vcom_socket_wrapper.c
new file mode 100644
index 00000000000..19a775de99e
--- /dev/null
+++ b/src/vcl/vcom_socket_wrapper.c
@@ -0,0 +1,906 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2005-2008 Jelmer Vernooij <jelmer@samba.org>
+ * Copyright (C) 2006-2014 Stefan Metzmacher <metze@samba.org>
+ * Copyright (C) 2013-2014 Andreas Schneider <asn@samba.org>
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/*
+ Socket wrapper library. Passes all socket communication over
+ unix domain sockets if the environment variable SOCKET_WRAPPER_DIR
+ is set.
+*/
+
+#include <signal.h>
+#include <dlfcn.h>
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <pthread.h>
+
+#include <vcl/vcom_socket_wrapper.h>
+
+
+enum swrap_dbglvl_e
+{
+ SWRAP_LOG_ERROR = 0,
+ SWRAP_LOG_WARN,
+ SWRAP_LOG_DEBUG,
+ SWRAP_LOG_TRACE
+};
+
+
+/* Macros for accessing mutexes */
+#define SWRAP_LOCK(m) do { \
+ pthread_mutex_lock(&(m ## _mutex)); \
+} while(0)
+
+#define SWRAP_UNLOCK(m) do { \
+ pthread_mutex_unlock(&(m ## _mutex)); \
+} while(0)
+
+/* Add new global locks here please */
+#define SWRAP_LOCK_ALL \
+ SWRAP_LOCK(libc_symbol_binding); \
+
+#define SWRAP_UNLOCK_ALL \
+ SWRAP_UNLOCK(libc_symbol_binding); \
+
+
+
+/* The mutex for accessing the global libc.symbols */
+static pthread_mutex_t libc_symbol_binding_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+/* Function prototypes */
+
+#ifdef NDEBUG
+#define SWRAP_LOG(...)
+#else
+static void
+swrap_log (enum swrap_dbglvl_e dbglvl, const char *func,
+ const char *format, ...)
+PRINTF_ATTRIBUTE (3, 4);
+#define SWRAP_LOG(dbglvl, ...) swrap_log((dbglvl), __func__, __VA_ARGS__)
+
+ static void
+ swrap_log (enum swrap_dbglvl_e dbglvl,
+ const char *func, const char *format, ...)
+{
+ char buffer[1024];
+ va_list va;
+ unsigned int lvl = SWRAP_LOG_WARN;
+
+ va_start (va, format);
+ vsnprintf (buffer, sizeof (buffer), format, va);
+ va_end (va);
+
+ if (lvl >= dbglvl)
+ {
+ switch (dbglvl)
+ {
+ case SWRAP_LOG_ERROR:
+ fprintf (stderr,
+ "SWRAP_ERROR(%d) - %s: %s\n",
+ (int) getpid (), func, buffer);
+ break;
+ case SWRAP_LOG_WARN:
+ fprintf (stderr,
+ "SWRAP_WARN(%d) - %s: %s\n",
+ (int) getpid (), func, buffer);
+ break;
+ case SWRAP_LOG_DEBUG:
+ fprintf (stderr,
+ "SWRAP_DEBUG(%d) - %s: %s\n",
+ (int) getpid (), func, buffer);
+ break;
+ case SWRAP_LOG_TRACE:
+ fprintf (stderr,
+ "SWRAP_TRACE(%d) - %s: %s\n",
+ (int) getpid (), func, buffer);
+ break;
+ }
+ }
+}
+#endif
+
+
+/*********************************************************
+ * SWRAP LOADING LIBC FUNCTIONS
+ *********************************************************/
+
+#ifdef HAVE_ACCEPT4
+typedef int (*__libc_accept4) (int sockfd,
+ struct sockaddr * addr,
+ socklen_t * addrlen, int flags);
+#else
+typedef int (*__libc_accept) (int sockfd,
+ struct sockaddr * addr, socklen_t * addrlen);
+#endif
+typedef int (*__libc_bind) (int sockfd,
+ const struct sockaddr * addr, socklen_t addrlen);
+typedef int (*__libc_close) (int fd);
+typedef int (*__libc_connect) (int sockfd,
+ const struct sockaddr * addr,
+ socklen_t addrlen);
+
+#if 0
+/* TBD: dup and dup2 to be implemented later */
+typedef int (*__libc_dup) (int fd);
+typedef int (*__libc_dup2) (int oldfd, int newfd);
+#endif
+
+typedef int (*__libc_fcntl) (int fd, int cmd, ...);
+typedef FILE *(*__libc_fopen) (const char *name, const char *mode);
+#ifdef HAVE_FOPEN64
+typedef FILE *(*__libc_fopen64) (const char *name, const char *mode);
+#endif
+#ifdef HAVE_EVENTFD
+typedef int (*__libc_eventfd) (int count, int flags);
+#endif
+typedef int (*__libc_getpeername) (int sockfd,
+ struct sockaddr * addr,
+ socklen_t * addrlen);
+typedef int (*__libc_getsockname) (int sockfd,
+ struct sockaddr * addr,
+ socklen_t * addrlen);
+typedef int (*__libc_getsockopt) (int sockfd,
+ int level,
+ int optname,
+ void *optval, socklen_t * optlen);
+typedef int (*__libc_ioctl) (int d, unsigned long int request, ...);
+typedef int (*__libc_listen) (int sockfd, int backlog);
+typedef int (*__libc_open) (const char *pathname, int flags, mode_t mode);
+#ifdef HAVE_OPEN64
+typedef int (*__libc_open64) (const char *pathname, int flags, mode_t mode);
+#endif /* HAVE_OPEN64 */
+typedef int (*__libc_openat) (int dirfd, const char *path, int flags, ...);
+typedef int (*__libc_pipe) (int pipefd[2]);
+typedef int (*__libc_read) (int fd, void *buf, size_t count);
+typedef ssize_t (*__libc_readv) (int fd, const struct iovec * iov,
+ int iovcnt);
+typedef int (*__libc_recv) (int sockfd, void *buf, size_t len, int flags);
+typedef int (*__libc_recvfrom) (int sockfd,
+ void *buf,
+ size_t len,
+ int flags,
+ struct sockaddr * src_addr,
+ socklen_t * addrlen);
+typedef int (*__libc_recvmsg) (int sockfd, const struct msghdr * msg,
+ int flags);
+typedef int (*__libc_send) (int sockfd, const void *buf, size_t len,
+ int flags);
+typedef int (*__libc_sendmsg) (int sockfd, const struct msghdr * msg,
+ int flags);
+typedef int (*__libc_sendto) (int sockfd, const void *buf, size_t len,
+ int flags, const struct sockaddr * dst_addr,
+ socklen_t addrlen);
+typedef int (*__libc_setsockopt) (int sockfd, int level, int optname,
+ const void *optval, socklen_t optlen);
+#ifdef HAVE_SIGNALFD
+typedef int (*__libc_signalfd) (int fd, const sigset_t * mask, int flags);
+#endif
+typedef int (*__libc_socket) (int domain, int type, int protocol);
+typedef int (*__libc_socketpair) (int domain, int type, int protocol,
+ int sv[2]);
+#ifdef HAVE_TIMERFD_CREATE
+typedef int (*__libc_timerfd_create) (int clockid, int flags);
+#endif
+typedef ssize_t (*__libc_write) (int fd, const void *buf, size_t count);
+typedef ssize_t (*__libc_writev) (int fd, const struct iovec * iov,
+ int iovcnt);
+
+typedef int (*__libc_shutdown) (int fd, int how);
+
+typedef int (*__libc_select) (int __nfds, fd_set * __restrict __readfds,
+ fd_set * __restrict __writefds,
+ fd_set * __restrict __exceptfds,
+ struct timeval * __restrict __timeout);
+
+#ifdef __USE_XOPEN2K
+typedef int (*__libc_pselect) (int __nfds, fd_set * __restrict __readfds,
+ fd_set * __restrict __writefds,
+ fd_set * __restrict __exceptfds,
+ const struct timespec * __restrict __timeout,
+ const __sigset_t * __restrict __sigmask);
+#endif
+
+typedef int (*__libc_epoll_create) (int __size);
+
+typedef int (*__libc_epoll_create1) (int __flags);
+
+typedef int (*__libc_epoll_ctl) (int __epfd, int __op, int __fd,
+ struct epoll_event * __event);
+
+typedef int (*__libc_epoll_wait) (int __epfd, struct epoll_event * __events,
+ int __maxevents, int __timeout);
+
+typedef int (*__libc_epoll_pwait) (int __epfd, struct epoll_event * __events,
+ int __maxevents, int __timeout,
+ const __sigset_t * __ss);
+
+typedef int (*__libc_poll) (struct pollfd * __fds, nfds_t __nfds,
+ int __timeout);
+
+#ifdef __USE_GNU
+typedef int (*__libc_ppoll) (struct pollfd * __fds, nfds_t __nfds,
+ const struct timespec * __timeout,
+ const __sigset_t * __ss);
+#endif
+
+
+#define SWRAP_SYMBOL_ENTRY(i) \
+ union { \
+ __libc_##i f; \
+ void *obj; \
+ } _libc_##i
+
+struct swrap_libc_symbols
+{
+#ifdef HAVE_ACCEPT4
+ SWRAP_SYMBOL_ENTRY (accept4);
+#else
+ SWRAP_SYMBOL_ENTRY (accept);
+#endif
+ SWRAP_SYMBOL_ENTRY (bind);
+ SWRAP_SYMBOL_ENTRY (close);
+ SWRAP_SYMBOL_ENTRY (connect);
+#if 0
+ /* TBD: dup and dup2 to be implemented later */
+ SWRAP_SYMBOL_ENTRY (dup);
+ SWRAP_SYMBOL_ENTRY (dup2);
+#endif
+ SWRAP_SYMBOL_ENTRY (fcntl);
+ SWRAP_SYMBOL_ENTRY (fopen);
+#ifdef HAVE_FOPEN64
+ SWRAP_SYMBOL_ENTRY (fopen64);
+#endif
+#ifdef HAVE_EVENTFD
+ SWRAP_SYMBOL_ENTRY (eventfd);
+#endif
+ SWRAP_SYMBOL_ENTRY (getpeername);
+ SWRAP_SYMBOL_ENTRY (getsockname);
+ SWRAP_SYMBOL_ENTRY (getsockopt);
+ SWRAP_SYMBOL_ENTRY (ioctl);
+ SWRAP_SYMBOL_ENTRY (listen);
+ SWRAP_SYMBOL_ENTRY (open);
+#ifdef HAVE_OPEN64
+ SWRAP_SYMBOL_ENTRY (open64);
+#endif
+ SWRAP_SYMBOL_ENTRY (openat);
+ SWRAP_SYMBOL_ENTRY (pipe);
+ SWRAP_SYMBOL_ENTRY (read);
+ SWRAP_SYMBOL_ENTRY (readv);
+ SWRAP_SYMBOL_ENTRY (recv);
+ SWRAP_SYMBOL_ENTRY (recvfrom);
+ SWRAP_SYMBOL_ENTRY (recvmsg);
+ SWRAP_SYMBOL_ENTRY (send);
+ SWRAP_SYMBOL_ENTRY (sendmsg);
+ SWRAP_SYMBOL_ENTRY (sendto);
+ SWRAP_SYMBOL_ENTRY (setsockopt);
+#ifdef HAVE_SIGNALFD
+ SWRAP_SYMBOL_ENTRY (signalfd);
+#endif
+ SWRAP_SYMBOL_ENTRY (socket);
+ SWRAP_SYMBOL_ENTRY (socketpair);
+#ifdef HAVE_TIMERFD_CREATE
+ SWRAP_SYMBOL_ENTRY (timerfd_create);
+#endif
+ SWRAP_SYMBOL_ENTRY (write);
+ SWRAP_SYMBOL_ENTRY (writev);
+
+ SWRAP_SYMBOL_ENTRY (shutdown);
+ SWRAP_SYMBOL_ENTRY (select);
+#ifdef __USE_XOPEN2K
+ SWRAP_SYMBOL_ENTRY (pselect);
+#endif
+ SWRAP_SYMBOL_ENTRY (epoll_create);
+ SWRAP_SYMBOL_ENTRY (epoll_create1);
+ SWRAP_SYMBOL_ENTRY (epoll_ctl);
+ SWRAP_SYMBOL_ENTRY (epoll_wait);
+ SWRAP_SYMBOL_ENTRY (epoll_pwait);
+ SWRAP_SYMBOL_ENTRY (poll);
+#ifdef __USE_GNU
+ SWRAP_SYMBOL_ENTRY (ppoll);
+#endif
+};
+
+struct swrap
+{
+ struct
+ {
+ void *handle;
+ void *socket_handle;
+ struct swrap_libc_symbols symbols;
+ } libc;
+};
+
+static struct swrap swrap;
+
+#define LIBC_NAME "libc.so"
+
+enum swrap_lib
+{
+ SWRAP_LIBC,
+};
+
+#ifndef NDEBUG
+static const char *
+swrap_str_lib (enum swrap_lib lib)
+{
+ switch (lib)
+ {
+ case SWRAP_LIBC:
+ return "libc";
+ }
+
+ /* Compiler would warn us about unhandled enum value if we get here */
+ return "unknown";
+}
+#endif
+
+static void *
+swrap_load_lib_handle (enum swrap_lib lib)
+{
+ int flags = RTLD_LAZY;
+ void *handle = NULL;
+ int i;
+
+#ifdef RTLD_DEEPBIND
+ flags |= RTLD_DEEPBIND;
+#endif
+
+ switch (lib)
+ {
+ case SWRAP_LIBC:
+ handle = swrap.libc.handle;
+#ifdef LIBC_SO
+ if (handle == NULL)
+ {
+ handle = dlopen (LIBC_SO, flags);
+
+ swrap.libc.handle = handle;
+ }
+#endif
+ if (handle == NULL)
+ {
+ for (i = 10; i >= 0; i--)
+ {
+ char soname[256] = { 0 };
+
+ snprintf (soname, sizeof (soname), "libc.so.%d", i);
+ handle = dlopen (soname, flags);
+ if (handle != NULL)
+ {
+ break;
+ }
+ }
+
+ swrap.libc.handle = handle;
+ }
+ break;
+ }
+
+ if (handle == NULL)
+ {
+ SWRAP_LOG (SWRAP_LOG_ERROR,
+ "Failed to dlopen library: %s\n", dlerror ());
+ exit (-1);
+ }
+
+ return handle;
+}
+
+static void *
+_swrap_bind_symbol (enum swrap_lib lib, const char *fn_name)
+{
+ void *handle;
+ void *func;
+
+ handle = swrap_load_lib_handle (lib);
+
+ func = dlsym (handle, fn_name);
+ if (func == NULL)
+ {
+ SWRAP_LOG (SWRAP_LOG_ERROR,
+ "Failed to find %s: %s\n", fn_name, dlerror ());
+ exit (-1);
+ }
+
+ SWRAP_LOG (SWRAP_LOG_TRACE,
+ "Loaded %s from %s", fn_name, swrap_str_lib (lib));
+
+ return func;
+}
+
+#define swrap_bind_symbol_libc(sym_name) \
+ SWRAP_LOCK(libc_symbol_binding); \
+ if (swrap.libc.symbols._libc_##sym_name.obj == NULL) { \
+ swrap.libc.symbols._libc_##sym_name.obj = \
+ _swrap_bind_symbol(SWRAP_LIBC, #sym_name); \
+ } \
+ SWRAP_UNLOCK(libc_symbol_binding)
+
+/*
+ * IMPORTANT
+ *
+ * Functions especially from libc need to be loaded individually, you can't load
+ * all at once or gdb will segfault at startup. The same applies to valgrind and
+ * has probably something todo with with the linker.
+ * So we need load each function at the point it is called the first time.
+ */
+#ifdef HAVE_ACCEPT4
+int
+libc_accept4 (int sockfd,
+ struct sockaddr *addr, socklen_t * addrlen, int flags)
+{
+ swrap_bind_symbol_libc (accept4);
+
+ return swrap.libc.symbols._libc_accept4.f (sockfd, addr, addrlen, flags);
+}
+
+#else /* HAVE_ACCEPT4 */
+
+int
+libc_accept (int sockfd, struct sockaddr *addr, socklen_t * addrlen)
+{
+ swrap_bind_symbol_libc (accept);
+
+ return swrap.libc.symbols._libc_accept.f (sockfd, addr, addrlen);
+}
+#endif /* HAVE_ACCEPT4 */
+
+int
+libc_bind (int sockfd, const struct sockaddr *addr, socklen_t addrlen)
+{
+ swrap_bind_symbol_libc (bind);
+
+ return swrap.libc.symbols._libc_bind.f (sockfd, addr, addrlen);
+}
+
+int
+libc_close (int fd)
+{
+ swrap_bind_symbol_libc (close);
+
+ return swrap.libc.symbols._libc_close.f (fd);
+}
+
+int
+libc_connect (int sockfd, const struct sockaddr *addr, socklen_t addrlen)
+{
+ swrap_bind_symbol_libc (connect);
+
+ return swrap.libc.symbols._libc_connect.f (sockfd, addr, addrlen);
+}
+
+#if 0
+/* TBD: dup and dup2 to be implemented later */
+int
+libc_dup (int fd)
+{
+ swrap_bind_symbol_libc (dup);
+
+ return swrap.libc.symbols._libc_dup.f (fd);
+}
+
+int
+libc_dup2 (int oldfd, int newfd)
+{
+ swrap_bind_symbol_libc (dup2);
+
+ return swrap.libc.symbols._libc_dup2.f (oldfd, newfd);
+}
+#endif
+
+#ifdef HAVE_EVENTFD
+int
+libc_eventfd (int count, int flags)
+{
+ swrap_bind_symbol_libc (eventfd);
+
+ return swrap.libc.symbols._libc_eventfd.f (count, flags);
+}
+#endif
+
+DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE int
+libc_vfcntl (int fd, int cmd, va_list ap)
+{
+ long int args[4];
+ int rc;
+ int i;
+
+ swrap_bind_symbol_libc (fcntl);
+
+ for (i = 0; i < 4; i++)
+ {
+ args[i] = va_arg (ap, long int);
+ }
+
+ rc = swrap.libc.symbols._libc_fcntl.f (fd,
+ cmd,
+ args[0], args[1], args[2], args[3]);
+
+ return rc;
+}
+
+DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE int
+libc_vioctl (int fd, int cmd, va_list ap)
+{
+ long int args[4];
+ int rc;
+ int i;
+
+ swrap_bind_symbol_libc (ioctl);
+
+ for (i = 0; i < 4; i++)
+ {
+ args[i] = va_arg (ap, long int);
+ }
+
+ rc = swrap.libc.symbols._libc_ioctl.f (fd,
+ cmd,
+ args[0], args[1], args[2], args[3]);
+
+ return rc;
+}
+
+int
+libc_getpeername (int sockfd, struct sockaddr *addr, socklen_t * addrlen)
+{
+ swrap_bind_symbol_libc (getpeername);
+
+ return swrap.libc.symbols._libc_getpeername.f (sockfd, addr, addrlen);
+}
+
+int
+libc_getsockname (int sockfd, struct sockaddr *addr, socklen_t * addrlen)
+{
+ swrap_bind_symbol_libc (getsockname);
+
+ return swrap.libc.symbols._libc_getsockname.f (sockfd, addr, addrlen);
+}
+
+int
+libc_getsockopt (int sockfd,
+ int level, int optname, void *optval, socklen_t * optlen)
+{
+ swrap_bind_symbol_libc (getsockopt);
+
+ return swrap.libc.symbols._libc_getsockopt.f (sockfd,
+ level,
+ optname, optval, optlen);
+}
+
+int
+libc_listen (int sockfd, int backlog)
+{
+ swrap_bind_symbol_libc (listen);
+
+ return swrap.libc.symbols._libc_listen.f (sockfd, backlog);
+}
+
+int
+libc_read (int fd, void *buf, size_t count)
+{
+ swrap_bind_symbol_libc (read);
+
+ return swrap.libc.symbols._libc_read.f (fd, buf, count);
+}
+
+ssize_t
+libc_readv (int fd, const struct iovec * iov, int iovcnt)
+{
+ swrap_bind_symbol_libc (readv);
+
+ return swrap.libc.symbols._libc_readv.f (fd, iov, iovcnt);
+}
+
+int
+libc_recv (int sockfd, void *buf, size_t len, int flags)
+{
+ swrap_bind_symbol_libc (recv);
+
+ return swrap.libc.symbols._libc_recv.f (sockfd, buf, len, flags);
+}
+
+int
+libc_recvfrom (int sockfd,
+ void *buf,
+ size_t len,
+ int flags, struct sockaddr *src_addr, socklen_t * addrlen)
+{
+ swrap_bind_symbol_libc (recvfrom);
+
+ return swrap.libc.symbols._libc_recvfrom.f (sockfd,
+ buf,
+ len, flags, src_addr, addrlen);
+}
+
+int
+libc_recvmsg (int sockfd, struct msghdr *msg, int flags)
+{
+ swrap_bind_symbol_libc (recvmsg);
+
+ return swrap.libc.symbols._libc_recvmsg.f (sockfd, msg, flags);
+}
+
+int
+libc_send (int sockfd, const void *buf, size_t len, int flags)
+{
+ swrap_bind_symbol_libc (send);
+
+ return swrap.libc.symbols._libc_send.f (sockfd, buf, len, flags);
+}
+
+int
+libc_sendmsg (int sockfd, const struct msghdr *msg, int flags)
+{
+ swrap_bind_symbol_libc (sendmsg);
+
+ return swrap.libc.symbols._libc_sendmsg.f (sockfd, msg, flags);
+}
+
+int
+libc_sendto (int sockfd,
+ const void *buf,
+ size_t len,
+ int flags, const struct sockaddr *dst_addr, socklen_t addrlen)
+{
+ swrap_bind_symbol_libc (sendto);
+
+ return swrap.libc.symbols._libc_sendto.f (sockfd,
+ buf,
+ len, flags, dst_addr, addrlen);
+}
+
+int
+libc_setsockopt (int sockfd,
+ int level, int optname, const void *optval, socklen_t optlen)
+{
+ swrap_bind_symbol_libc (setsockopt);
+
+ return swrap.libc.symbols._libc_setsockopt.f (sockfd,
+ level,
+ optname, optval, optlen);
+}
+
+int
+libc_socket (int domain, int type, int protocol)
+{
+ swrap_bind_symbol_libc (socket);
+
+ return swrap.libc.symbols._libc_socket.f (domain, type, protocol);
+}
+
+int
+libc_socketpair (int domain, int type, int protocol, int sv[2])
+{
+ swrap_bind_symbol_libc (socketpair);
+
+ return swrap.libc.symbols._libc_socketpair.f (domain, type, protocol, sv);
+}
+
+ssize_t
+libc_write (int fd, const void *buf, size_t count)
+{
+ swrap_bind_symbol_libc (write);
+
+ return swrap.libc.symbols._libc_write.f (fd, buf, count);
+}
+
+ssize_t
+libc_writev (int fd, const struct iovec * iov, int iovcnt)
+{
+ swrap_bind_symbol_libc (writev);
+
+ return swrap.libc.symbols._libc_writev.f (fd, iov, iovcnt);
+}
+
+int
+libc_shutdown (int fd, int how)
+{
+ swrap_bind_symbol_libc (shutdown);
+
+ return swrap.libc.symbols._libc_shutdown.f (fd, how);
+}
+
+int
+libc_select (int __nfds, fd_set * __restrict __readfds,
+ fd_set * __restrict __writefds,
+ fd_set * __restrict __exceptfds,
+ struct timeval *__restrict __timeout)
+{
+ swrap_bind_symbol_libc (select);
+
+ return swrap.libc.symbols._libc_select.f (__nfds, __readfds,
+ __writefds,
+ __exceptfds, __timeout);
+}
+
+#ifdef __USE_XOPEN2K
+int
+libc_pselect (int __nfds, fd_set * __restrict __readfds,
+ fd_set * __restrict __writefds,
+ fd_set * __restrict __exceptfds,
+ const struct timespec *__restrict __timeout,
+ const __sigset_t * __restrict __sigmask)
+{
+ swrap_bind_symbol_libc (pselect);
+
+ return swrap.libc.symbols._libc_pselect.f (__nfds, __readfds,
+ __writefds,
+ __exceptfds,
+ __timeout, __sigmask);
+}
+#endif
+
+int
+libc_epoll_create (int __size)
+{
+ swrap_bind_symbol_libc (epoll_create);
+
+ return swrap.libc.symbols._libc_epoll_create.f (__size);
+}
+
+int
+libc_epoll_create1 (int __flags)
+{
+ swrap_bind_symbol_libc (epoll_create1);
+
+ return swrap.libc.symbols._libc_epoll_create1.f (__flags);
+}
+
+int
+libc_epoll_ctl (int __epfd, int __op, int __fd, struct epoll_event *__event)
+{
+ swrap_bind_symbol_libc (epoll_ctl);
+
+ return swrap.libc.symbols._libc_epoll_ctl.f (__epfd, __op, __fd, __event);
+}
+
+int
+libc_epoll_wait (int __epfd, struct epoll_event *__events,
+ int __maxevents, int __timeout)
+{
+ swrap_bind_symbol_libc (epoll_wait);
+
+ return swrap.libc.symbols._libc_epoll_wait.f (__epfd, __events,
+ __maxevents, __timeout);
+}
+
+int
+libc_epoll_pwait (int __epfd, struct epoll_event *__events,
+ int __maxevents, int __timeout, const __sigset_t * __ss)
+{
+ swrap_bind_symbol_libc (epoll_pwait);
+
+ return swrap.libc.symbols._libc_epoll_pwait.f (__epfd, __events,
+ __maxevents, __timeout,
+ __ss);
+}
+
+int
+libc_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout)
+{
+ swrap_bind_symbol_libc (poll);
+
+ return swrap.libc.symbols._libc_poll.f (__fds, __nfds, __timeout);
+}
+
+#ifdef __USE_GNU
+int
+libc_ppoll (struct pollfd *__fds, nfds_t __nfds,
+ const struct timespec *__timeout, const __sigset_t * __ss)
+{
+ swrap_bind_symbol_libc (ppoll);
+
+ return swrap.libc.symbols._libc_ppoll.f (__fds, __nfds, __timeout, __ss);
+}
+#endif
+
+static void
+swrap_thread_prepare (void)
+{
+ SWRAP_LOCK_ALL;
+}
+
+static void
+swrap_thread_parent (void)
+{
+ SWRAP_UNLOCK_ALL;
+}
+
+static void
+swrap_thread_child (void)
+{
+ SWRAP_UNLOCK_ALL;
+}
+
+/****************************
+ * CONSTRUCTOR
+ ***************************/
+void
+swrap_constructor (void)
+{
+ /*
+ * If we hold a lock and the application forks, then the child
+ * is not able to unlock the mutex and we are in a deadlock.
+ * This should prevent such deadlocks.
+ */
+ pthread_atfork (&swrap_thread_prepare,
+ &swrap_thread_parent, &swrap_thread_child);
+}
+
+/****************************
+ * DESTRUCTOR
+ ***************************/
+
+/*
+ * This function is called when the library is unloaded and makes sure that
+ * sockets get closed and the unix file for the socket are unlinked.
+ */
+void
+swrap_destructor (void)
+{
+ if (swrap.libc.handle != NULL)
+ {
+ dlclose (swrap.libc.handle);
+ }
+ if (swrap.libc.socket_handle)
+ {
+ dlclose (swrap.libc.socket_handle);
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vcl/vcom_socket_wrapper.h b/src/vcl/vcom_socket_wrapper.h
new file mode 100644
index 00000000000..9e85ecf2b6c
--- /dev/null
+++ b/src/vcl/vcom_socket_wrapper.h
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2005-2008 Jelmer Vernooij <jelmer@samba.org>
+ * Copyright (C) 2006-2014 Stefan Metzmacher <metze@samba.org>
+ * Copyright (C) 2013-2014 Andreas Schneider <asn@samba.org>
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/*
+ Socket wrapper library. Passes all socket communication over
+ unix domain sockets if the environment variable SOCKET_WRAPPER_DIR
+ is set.
+*/
+
+#ifndef included_vcom_socket_wrapper_h
+#define included_vcom_socket_wrapper_h
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/select.h>
+#include <sys/epoll.h>
+#include <poll.h>
+#include <sys/uio.h>
+#include <stdlib.h>
+
+
+/* GCC have printf type attribute check. */
+#ifdef HAVE_FUNCTION_ATTRIBUTE_FORMAT
+#define PRINTF_ATTRIBUTE(a,b) __attribute__ ((__format__ (__printf__, a, b)))
+#else
+#define PRINTF_ATTRIBUTE(a,b)
+#endif /* HAVE_FUNCTION_ATTRIBUTE_FORMAT */
+
+#define HAVE_CONSTRUCTOR_ATTRIBUTE
+#ifdef HAVE_CONSTRUCTOR_ATTRIBUTE
+#define CONSTRUCTOR_ATTRIBUTE __attribute__ ((constructor))
+#else
+#define CONSTRUCTOR_ATTRIBUTE
+#endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */
+
+#define HAVE_DESTRUCTOR_ATTRIBUTE
+#ifdef HAVE_DESTRUCTOR_ATTRIBUTE
+#define DESTRUCTOR_ATTRIBUTE __attribute__ ((destructor))
+#else
+#define DESTRUCTOR_ATTRIBUTE
+#endif
+
+#define HAVE_ADDRESS_SANITIZER_ATTRIBUTE
+#ifdef HAVE_ADDRESS_SANITIZER_ATTRIBUTE
+#define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE __attribute__((no_sanitize_address))
+#else
+#define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE
+#endif
+
+/*
+ * IMPORTANT
+ *
+ * Functions especially from libc need to be loaded individually, you can't load
+ * all at once or gdb will segfault at startup. The same applies to valgrind and
+ * has probably something todo with with the linker.
+ * So we need load each function at the point it is called the first time.
+ */
+#ifdef HAVE_ACCEPT4
+int
+libc_accept4 (int sockfd,
+ struct sockaddr *addr, socklen_t * addrlen, int flags);
+#else /* HAVE_ACCEPT4 */
+int libc_accept (int sockfd, struct sockaddr *addr, socklen_t * addrlen);
+#endif /* HAVE_ACCEPT4 */
+
+int libc_bind (int sockfd, const struct sockaddr *addr, socklen_t addrlen);
+
+int libc_close (int fd);
+
+int libc_connect (int sockfd, const struct sockaddr *addr, socklen_t addrlen);
+
+#if 0
+/* TBD: dup and dup2 to be implemented later */
+int libc_dup (int fd);
+
+int libc_dup2 (int oldfd, int newfd);
+#endif
+
+#ifdef HAVE_EVENTFD
+int libc_eventfd (int count, int flags);
+#endif
+
+DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE int
+libc_vfcntl (int fd, int cmd, va_list ap);
+
+DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE int
+libc_vioctl (int fd, int cmd, va_list ap);
+
+int libc_getpeername (int sockfd, struct sockaddr *addr, socklen_t * addrlen);
+
+int libc_getsockname (int sockfd, struct sockaddr *addr, socklen_t * addrlen);
+
+int
+libc_getsockopt (int sockfd,
+ int level, int optname, void *optval, socklen_t * optlen);
+
+int libc_listen (int sockfd, int backlog);
+
+int libc_read (int fd, void *buf, size_t count);
+
+ssize_t libc_readv (int fd, const struct iovec *iov, int iovcnt);
+
+int libc_recv (int sockfd, void *buf, size_t len, int flags);
+
+int
+libc_recvfrom (int sockfd,
+ void *buf,
+ size_t len,
+ int flags, struct sockaddr *src_addr, socklen_t * addrlen);
+
+int libc_recvmsg (int sockfd, struct msghdr *msg, int flags);
+
+int libc_send (int sockfd, const void *buf, size_t len, int flags);
+
+int libc_sendmsg (int sockfd, const struct msghdr *msg, int flags);
+
+int
+libc_sendto (int sockfd,
+ const void *buf,
+ size_t len,
+ int flags, const struct sockaddr *dst_addr, socklen_t addrlen);
+
+int
+libc_setsockopt (int sockfd,
+ int level, int optname, const void *optval,
+ socklen_t optlen);
+
+int libc_socket (int domain, int type, int protocol);
+
+int libc_socketpair (int domain, int type, int protocol, int sv[2]);
+
+ssize_t libc_write (int fd, const void *buf, size_t count);
+
+ssize_t libc_writev (int fd, const struct iovec *iov, int iovcnt);
+
+int libc_shutdown (int fd, int how);
+
+int
+libc_select (int __nfds, fd_set * __restrict __readfds,
+ fd_set * __restrict __writefds,
+ fd_set * __restrict __exceptfds,
+ struct timeval *__restrict __timeout);
+
+#ifdef __USE_XOPEN2K
+int
+libc_pselect (int __nfds, fd_set * __restrict __readfds,
+ fd_set * __restrict __writefds,
+ fd_set * __restrict __exceptfds,
+ const struct timespec *__restrict __timeout,
+ const __sigset_t * __restrict __sigmask);
+#endif
+
+int libc_epoll_create (int __size);
+
+int libc_epoll_create1 (int __flags);
+
+int libc_epoll_ctl (int __epfd, int __op, int __fd,
+ struct epoll_event *__event);
+
+int libc_epoll_wait (int __epfd, struct epoll_event *__events,
+ int __maxevents, int __timeout);
+
+int libc_epoll_pwait (int __epfd, struct epoll_event *__events,
+ int __maxevents, int __timeout,
+ const __sigset_t * __ss);
+
+int libc_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout);
+
+#ifdef __USE_GNU
+int libc_ppoll (struct pollfd *__fds, nfds_t __nfds,
+ const struct timespec *__timeout, const __sigset_t * __ss);
+#endif
+
+void swrap_constructor (void);
+
+void swrap_destructor (void);
+
+#endif /* included_vcom_socket_wrapper_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/uri/vppcom.c b/src/vcl/vppcom.c
index 24475b46606..0f30c60c803 100644
--- a/src/uri/vppcom.c
+++ b/src/vcl/vppcom.c
@@ -20,7 +20,7 @@
#include <vlibmemory/api.h>
#include <vpp/api/vpe_msg_enum.h>
#include <vnet/session/application_interface.h>
-#include <uri/vppcom.h>
+#include <vcl/vppcom.h>
#include <vlib/unix/unix.h>
#include <vppinfra/vec_bootstrap.h>
diff --git a/src/uri/vppcom.h b/src/vcl/vppcom.h
index dd72986ec53..dd72986ec53 100644
--- a/src/uri/vppcom.h
+++ b/src/vcl/vppcom.h
diff --git a/src/uri/vppcom_test.conf b/src/vcl/vppcom_test.conf
index e5ac46363f8..e5ac46363f8 100644
--- a/src/uri/vppcom_test.conf
+++ b/src/vcl/vppcom_test.conf